##Loading packages

suppressPackageStartupMessages({
    library("R.utils")
    library("missMethyl")
    library("limma")
    library("topconfects")
    library("minfi")
    library("IlluminaHumanMethylation450kmanifest")
    library("MethylToSNP")
    library("RColorBrewer")
    library("IlluminaHumanMethylation450kanno.ilmn12.hg19")
    library("eulerr")
    library("plyr")
    library("gplots")
    library("reshape2")
    library("beeswarm")
  library("BSgenome")
library("MEDIPS")
  library("DESeq2")
library("BSgenome.Hsapiens.UCSC.hg19")
  library("ensembldb")
  library("EnsDb.Hsapiens.v75")
  library("GenomicRanges")
  })

#customised beeswarm chart for MEDIP Seq data
make_beeswarm2<- function(dm,name,mx,groups,n=15) {
    par(mar=c(3,3,1,1))
    NCOLS=5
    NROWS=floor(n/NCOLS)
    if (n %% NCOLS > 0) { NROWS <- NROWS + 1 }
    par(mfrow=c(NROWS, NCOLS))
    topgenes <-  rownames(head(dm[order(dm$pvalue),],n))
    mx<-mx/colSums(mx)*1e6
    ss <- mx[which(rownames(mx) %in% topgenes),]
    n <- 1:n
    g1name=levels(groups)[1]
    g2name=levels(groups)[2]
    g1dat <- ss[n,which(groups == g1name)]
    g2dat <- ss[n,which(groups == g2name)]
    g1l <-lapply(split(g1dat, row.names(g1dat)), unlist)
    g2l <-lapply(split(g2dat, row.names(g2dat)), unlist)
   for (i in n) {
      mydat <- list(g1l[[i]],g2l[[i]])
        beeswarm(mydat,cex=0.4, pch=19,
        las=2, cex.lab=0.6, main=names( g1l )[i] , 
        ylab="",labels = c(g1name,g2name))
      grid()
    }
}

#customised beeswarm chart(confects) for MEDIP Seq data
make_beeswarms_confects2 <- function(confects,name,mx,groups,n=15) {
    par(mar=c(3,3,1,1))
    NCOLS=5
    NROWS=floor(n/NCOLS)
    if (n %% NCOLS > 0) { NROWS <- NROWS + 1 }
    par(mfrow=c(NROWS, NCOLS))
    topgenes <-  head(confects$table,n)$name
    mx<-mx/colSums(mx)*1e6
    ss <- mx[which(rownames(mx) %in% topgenes),]
    n <- 1:n
    g1name=levels(groups)[1]
    g2name=levels(groups)[2]
    g1dat <- ss[n,which(groups == g1name)]
    g2dat <- ss[n,which(groups == g2name)]
    g1l <-lapply(split(g1dat, row.names(g1dat)), unlist)
    g2l <-lapply(split(g2dat, row.names(g2dat)), unlist)

    for (i in n) {
      mydat <- list(g1l[[i]],g2l[[i]])
        beeswarm(mydat,cex=0.4, pch=19,
        las=2, cex.lab=0.6, main=names( g1l )[i] , 
        ylab="",labels = c(g1name,g2name))
      grid()
    }
}

#Annotation
names(listTables(EnsDb.Hsapiens.v75))
##  [1] "gene"           "tx"             "tx2exon"        "exon"          
##  [5] "chromosome"     "protein"        "uniprot"        "protein_domain"
##  [9] "entrezgene"     "metadata"
ensgenes<-genes(EnsDb.Hsapiens.v75)
enstranscripts<-transcripts(EnsDb.Hsapiens.v75)
enspromoters<-promoters(EnsDb.Hsapiens.v75,columns=c("gene_id","gene_name"))
head(enspromoters)
## GRanges object with 6 ranges and 3 metadata columns:
##                   seqnames      ranges strand |         gene_id   gene_name
##                      <Rle>   <IRanges>  <Rle> |     <character> <character>
##   ENST00000456328        1  9869-12068      + | ENSG00000223972     DDX11L1
##   ENST00000515242        1  9872-12071      + | ENSG00000223972     DDX11L1
##   ENST00000518655        1  9874-12073      + | ENSG00000223972     DDX11L1
##   ENST00000450305        1 10010-12209      + | ENSG00000223972     DDX11L1
##   ENST00000438504        1 29171-31370      - | ENSG00000227232      WASH7P
##   ENST00000541675        1 24687-26886      - | ENSG00000227232      WASH7P
##                             tx_id
##                       <character>
##   ENST00000456328 ENST00000456328
##   ENST00000515242 ENST00000515242
##   ENST00000518655 ENST00000518655
##   ENST00000450305 ENST00000450305
##   ENST00000438504 ENST00000438504
##   ENST00000541675 ENST00000541675
##   -------
##   seqinfo: 273 sequences from GRCh37 genome
head(ensgenes)
## GRanges object with 6 ranges and 6 metadata columns:
##                   seqnames      ranges strand |         gene_id   gene_name
##                      <Rle>   <IRanges>  <Rle> |     <character> <character>
##   ENSG00000223972        1 11869-14412      + | ENSG00000223972     DDX11L1
##   ENSG00000227232        1 14363-29806      - | ENSG00000227232      WASH7P
##   ENSG00000243485        1 29554-31109      + | ENSG00000243485  MIR1302-10
##   ENSG00000237613        1 34554-36081      - | ENSG00000237613     FAM138A
##   ENSG00000268020        1 52473-54936      + | ENSG00000268020      OR4G4P
##   ENSG00000240361        1 62948-63887      + | ENSG00000240361     OR4G11P
##                   gene_biotype seq_coord_system      symbol
##                    <character>      <character> <character>
##   ENSG00000223972   pseudogene       chromosome     DDX11L1
##   ENSG00000227232   pseudogene       chromosome      WASH7P
##   ENSG00000243485      lincRNA       chromosome  MIR1302-10
##   ENSG00000237613      lincRNA       chromosome     FAM138A
##   ENSG00000268020   pseudogene       chromosome      OR4G4P
##   ENSG00000240361   pseudogene       chromosome     OR4G11P
##                                            entrezid
##                                              <list>
##   ENSG00000223972               100287596,100287102
##   ENSG00000227232                  100287171,653635
##   ENSG00000243485 100422919,100422834,100422831,...
##   ENSG00000237613              654835,645520,641702
##   ENSG00000268020                                NA
##   ENSG00000240361                                NA
##   -------
##   seqinfo: 273 sequences from GRCh37 genome

counts

counts<-read.table("~/mr.edgeR.c.test.tsv.gz",sep="\t",header=TRUE,row.names=1)
mycol<-max(grep(".bam.counts",colnames(counts)))
counts<-counts[,1:mycol]
rownames(counts)<-paste(counts$chr,counts$start, counts$stop)
counts[,1:4]=NULL
colnames(counts)<-gsub(".bam.counts","",colnames(counts))

metadata

sf1<-read.table("~/castillo_metadata/EGAD00001003159_metadata/delimited_maps/Sample_File.map",stringsAsFactors = FALSE) 
rs1<-read.table("~/castillo_metadata/EGAD00001003159_metadata/delimited_maps/Run_Sample_meta_info.map",sep=";",stringsAsFactors = FALSE)
rs1[,ncol(rs1)]=NULL
md1<-apply(rs1,2,function(x){
  sapply(strsplit(x,"="),"[[",2)
  })
headers<-t(rs1[1,])
colnames(md1)<-sapply(strsplit(headers[,1],"="), "[[",1)
rownames(md1)<-sf1[,1]
md1<-as.data.frame(md1)


sf2<-read.table("~/castillo_metadata/EGAD00001003158_metadata/delimited_maps/Sample_File.map",stringsAsFactors = FALSE) 
rs2<-read.table("~/castillo_metadata/EGAD00001003158_metadata/delimited_maps/Run_Sample_meta_info.map",sep=";",stringsAsFactors = FALSE)
rs2[,ncol(rs2)]=NULL
md2<-apply(rs2,2,function(x){
  sapply(strsplit(x,"="),"[[",2)
  })
headers<-t(rs2[1,])
colnames(md2)<-sapply(strsplit(headers[,1],"="), "[[",1)
rownames(md2)<-sf2[,1]
md2<-as.data.frame(md2)

md <- as.data.frame(rbind(md1,md2))

mdc <- md[grep("C",rownames(md)),]

mdw <- md[grep("W",rownames(md)),]

## CBMC
## number of natural conception 43
nrow(subset(mdc,medical_help_to_conceive=="no"))
## [1] 43
## number of ovarian stimulation is 36
nrow(subset(mdc,ovarian_stimulation=="yes"))
## [1] 36
## number of gamete intrafallopian transfer is 2
nrow(subset(mdc,gamete_intrafallopian_transfer=="yes"))
## [1] 2
## numberof intracytoplasmic sperm injection is 21
nrow(subset(mdc,intracytoplasmic_sperm_injection=="yes"))
## [1] 21
## number of ICSI frozen embryo is 9
nrow(subset(mdc,intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="yes"))
## [1] 9
## number of ICSI fresh embryo is 12
nrow(subset(mdc,intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="no"))
## [1] 12
## WCB
## number of medical help to conceive is 54
nrow(subset(mdw,medical_help_to_conceive=="no"))
## [1] 54
## number of ovarian stimulation is 36
nrow(subset(mdw,ovarian_stimulation=="yes"))
## [1] 36
## number of gamete intrafallopian transfer is 2
nrow(subset(mdw,gamete_intrafallopian_transfer=="yes"))
## [1] 2
## numberof intracytoplasmic sperm injection is 18
nrow(subset(mdw,intracytoplasmic_sperm_injection=="yes"))
## [1] 18
## number of ICSI frozen embryo is 6
nrow(subset(mdw,intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="yes"))
## [1] 6
## number of ICSI fresh embryo is 12
nrow(subset(mdw,intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="no"))
## [1] 12

based on the numbers, following are the contrasts

CBMC and WMBC

CBMC: Natural Vs Ovarian stimulation

NAME = "CBMC_natural_vs_ovarian_stimulation"
samplesheet<-subset(mdc, medical_help_to_conceive=="no" | ovarian_stimulation=="yes")
samplesheet$groups <- factor(samplesheet$ovarian_stimulation,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]
dim(y)
## [1] 1449   79
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                           baseMean log2FoldChange     lfcSE     stat
## chr9 128456101 128456200  9.821874      0.6481876 0.1300329 4.984798
## chr9 128456001 128456100 12.714885      0.5981254 0.1306666 4.577493
## chrM 8001 8100           55.563106      0.6261791 0.1385588 4.519232
## chr8 109249601 109249700 10.118409      0.5219290 0.1174502 4.443831
## chr3 30615401 30615500   15.888654      0.4223159 0.0953818 4.427636
## chrM 3201 3300           62.713180      0.5534153 0.1252776 4.417510
## chrM 15901 16000         80.205776      0.6469908 0.1465164 4.415826
## chr2 209767401 209767500 10.726289      0.5365324 0.1215893 4.412662
## chr1 54833001 54833100   11.211590      0.4753991 0.1080931 4.398054
## chrM 15801 15900         84.510736      0.6298150 0.1436210 4.385256
##                                pvalue         padj
## chr9 128456101 128456200 6.202647e-07 0.0008987636
## chr9 128456001 128456100 4.705822e-06 0.0011136337
## chrM 8001 8100           6.206443e-06 0.0011136337
## chr8 109249601 109249700 8.837092e-06 0.0011136337
## chr3 30615401 30615500   9.527139e-06 0.0011136337
## chrM 3201 3300           9.984425e-06 0.0011136337
## chrM 15901 16000         1.006251e-05 0.0011136337
## chr2 209767401 209767500 1.021073e-05 0.0011136337
## chr1 54833001 54833100   1.092260e-05 0.0011136337
## chrM 15801 15900         1.158492e-05 0.0011136337
confects <- deseq2_confects(res)
head(confects$table)
##   rank index confect    effect  baseMean                     name filtered
## 1    1     1    0.11 0.6481876  9.821874 chr9 128456101 128456200    FALSE
## 2    2     5    0.11 0.5981254 12.714885 chr9 128456001 128456100    FALSE
## 3    3    17    0.11 0.6261791 55.563106           chrM 8001 8100    FALSE
## 4    4    63    0.11 0.6469908 80.205776         chrM 15901 16000    FALSE
## 5    5    64    0.11 0.6298150 84.510736         chrM 15801 15900    FALSE
## 6    6    11    0.11 0.6080177 31.682753         chrM 16501 16600    FALSE
# optional
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")

# smear plot optional
plot(log2(dge$baseMean),dge$log2FoldChange,cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 base mean",
ylim=c(-3,3),ylab="log2 fold change"
,pch=19,col="#838383")
points(log2(sig$baseMean),sig$log2FoldChange,cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
mygroups<-samplesheet$groups
mygroups<-gsub("no","nat",mygroups)
mygroups<-gsub("yes","os",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm1",mx=y,groups=mygroups,n=15)

make_beeswarms_confects2(confects=confects,name="dm1",mx=y,groups=mygroups,n=15)

# Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': M, Un_gl000211, Un_gl000214, Un_gl000219, Un_gl000237, Un_gl000224, Un_gl000220
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 436 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       54801 |         0
##     [2]         2       54801 |         0
##     [3]         3       52370 |         0
##     [4]         4       36552 |     32493
##     [5]         5       32158 |     17770
##     ...       ...         ... .       ...
##   [432]       432       16665 |      1264
##   [433]       433       45162 |         0
##   [434]       434       44871 |     98653
##   [435]       435       46046 |       764
##   [436]       436       34978 |         0
##   -------
##   queryLength: 436 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 436 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]        9 128456101-128456200      * |       MAPKAP1           0
##     [2]        9 128456001-128456100      * |       MAPKAP1           0
##     [3]        8 109249601-109249700      * |         EIF3E           0
##     [4]        3   30615401-30615500      * |        TGFBR2       32493
##     [5]        2 209767401-209767500      * |     RNA5SP117       17770
##     ...      ...                 ...    ... .           ...         ...
##   [432]       14   95551201-95551300      * |       LRG_492        1264
##   [433]        6   26385601-26385700      * |        BTN2A2           0
##   [434]        6     9025701-9025800      * | RP11-354I10.1       98653
##   [435]        6   52056201-52056300      * |         IL17A         764
##   [436]       22   18688601-18688700      * |    AC008079.9           0
##   -------
##   seqinfo: 29 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000216, Un_gl000231, Un_gl000224, Un_gl000225, Un_gl000214, 7_gl000195_random, Un_gl000240, Un_gl000241
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 833 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       18432 |         0
##     [2]         2       17062 |      2727
##     [3]         3        5610 |         0
##     [4]         4        7641 |         0
##     [5]         5       18432 |         0
##     ...       ...         ... .       ...
##   [829]       829       63088 |     19129
##   [830]       830       61918 |         0
##   [831]       831       62256 |         0
##   [832]       832       24800 |         0
##   [833]       833       61906 |       959
##   -------
##   queryLength: 833 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 833 ranges and 2 metadata columns:
##         seqnames              ranges strand |             gene    distance
##            <Rle>           <IRanges>  <Rle> |      <character> <DataFrame>
##     [1]       15   65461501-65461600      * |             CLPX           0
##     [2]       14 106061201-106061300      * |      AL928742.12        2727
##     [3]       10   12404601-12404700      * |           CAMK1D           0
##     [4]       10 134511601-134511700      * |           INPP5A           0
##     [5]       15   65461401-65461500      * |             CLPX           0
##     ...      ...                 ...    ... .              ...         ...
##   [829]        X 130983801-130983900      * | RP11-453F18__B.1       19129
##   [830]        X   49364801-49364900      * |            GAGE1           0
##   [831]        X   70608001-70608100      * |             TAF1           0
##   [832]       18     7725801-7725900      * |            PTPRM           0
##   [833]        X   49206101-49206200      * |           GAGE2D         959
##   -------
##   seqinfo: 31 sequences from an unspecified genome; no seqlengths

Natural Vs GIFT

NAME = "CBMC_natural_vs_gamete_intrafallopian_transfer"
samplesheet<-subset(mdc, medical_help_to_conceive=="no" | gamete_intrafallopian_transfer=="yes")
samplesheet$groups <- factor(samplesheet$gamete_intrafallopian_transfer,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr5 771901 772000         11.93588      1.2545816 0.3214614  3.902744
## chr17 57900901 57901000    10.19571      1.1338602 0.2967472  3.820964
## chrUn_gl000219 48201 48300 11.56128      1.3342415 0.3627180  3.678454
## chr4 114135401 114135500   13.02012      1.1889266 0.3274092  3.631317
## chr16 83260601 83260700    11.15672      1.2051794 0.3588543  3.358408
## chr16 83260701 83260800    10.35048      1.2704975 0.3791002  3.351350
## chr1 16963701 16963800     12.38379      0.8940748 0.2668147  3.350921
## chr17 1022101 1022200      15.26086     -2.0382549 0.6275773 -3.247815
## chr9 35059701 35059800     10.43422      1.0306126 0.3224758  3.195938
## chr2 87927401 87927500     10.81617      0.9131288 0.2914849  3.132679
##                                  pvalue      padj
## chr5 771901 772000         0.0000951084 0.1238252
## chr17 57900901 57901000    0.0001329310 0.1238252
## chrUn_gl000219 48201 48300 0.0002346517 0.1313317
## chr4 114135401 114135500   0.0002819789 0.1313317
## chr16 83260601 83260700    0.0007839268 0.2143605
## chr16 83260701 83260800    0.0008041852 0.2143605
## chr1 16963701 16963800     0.0008054339 0.2143605
## chr17 1022101 1022200      0.0011629492 0.2708218
## chr9 35059701 35059800     0.0013937693 0.2885103
## chr2 87927401 87927500     0.0017321865 0.2947524
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")

#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","nat",mygroups)
mygroups<-gsub("yes","gift",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm2",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dm2",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000219, Un_gl000237
##   - in 'y': 20, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 61 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       41851 |         0
##    [2]         2       23902 |         0
##    [3]         3       40875 |         0
##    [4]         4       21486 |         0
##    [5]         5       21486 |         0
##    ...       ...         ... .       ...
##   [57]        57       17655 |     18859
##   [58]        58       47137 |     18957
##   [59]        59         213 |     28578
##   [60]        60       36987 |      2847
##   [61]        61       39023 |    104693
##   -------
##   queryLength: 61 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 61 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]        5       771901-772000      * |       ZDHHC11           0
##    [2]       17   57900901-57901000      * |          VMP1           0
##    [3]        4 114135401-114135500      * |    AC004057.1           0
##    [4]       16   83260601-83260700      * |         CDH13           0
##    [5]       16   83260701-83260800      * |         CDH13           0
##    ...      ...                 ...    ... .           ...         ...
##   [57]       15   31541901-31542000      * |  RP11-16E12.2       18859
##   [58]        6 138464001-138464100      * |      KIAA1244       18957
##   [59]        1     4043901-4044000      * | RP13-614K11.1       28578
##   [60]        3   49501601-49501700      * |     RNA5SP130        2847
##   [61]        3 187281901-187282000      * |           SST      104693
##   -------
##   seqinfo: 23 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
ol_down
## Hits object with 40 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       21747 |         0
##    [2]         2       62108 |         0
##    [3]         3       41389 |         0
##    [4]         4       41389 |         0
##    [5]         5       18415 |         0
##    ...       ...         ... .       ...
##   [36]        36       17359 |      3720
##   [37]        37       62108 |         0
##   [38]        38       21291 |       246
##   [39]        39       62108 |         0
##   [40]        40       62175 |    280694
##   -------
##   queryLength: 40 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 40 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]       17     1022101-1022200      * |           ABR           0
##    [2]        X   56795901-56796000      * | RP11-622K12.1           0
##    [3]        4 156630301-156630400      * |       GUCY1A3           0
##    [4]        4 156630401-156630500      * |       GUCY1A3           0
##    [5]       15   65006301-65006400      * |    AC100830.3           0
##    ...      ...                 ...    ... .           ...         ...
##   [36]       15   22517001-22517100      * |      MIR1268A        3720
##   [37]        X   56796901-56797000      * | RP11-622K12.1           0
##   [38]       16   72059201-72059300      * |         DHODH         246
##   [39]        X   56796501-56796600      * | RP11-622K12.1           0
##   [40]        X   66176901-66177000      * |     RNU6-394P      280694
##   -------
##   seqinfo: 12 sequences from an unspecified genome; no seqlengths

Natural Vs ICSI fresh

NAME = "CBMC_natural_vs_intracytoplasmic_sperm_injection_fresh_embryo"
samplesheet<-subset(mdc, medical_help_to_conceive=="no" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="no"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]
head(samplesheet)
##       260.280_ratio birth_weight dna_concentration frozen_embryo
## C1223          1.83         2645            406.78            no
## C1229          1.73         2644            677.27            no
## C1331          1.71         2510            232.52            no
## C1332           1.7         1679            275.03            no
## C1333          1.74         2425            245.98            no
## C1334          1.81         3155            427.31            no
##       gamete_intrafallopian_transfer gender intracytoplasmic_sperm_injection
## C1223                             no female                               no
## C1229                             no female                               no
## C1331                             no female                               no
## C1332                             no female                               no
## C1333                             no female                               no
## C1334                             no   male                               no
##       maternal_age maternal_smoking medical_help_to_conceive mother_id
## C1223     29.14994               no                       no      3015
## C1229     37.33339              yes                       no      1033
## C1331     37.33339              yes                       no      1033
## C1332     34.04522               no                       no      1035
## C1333     34.04522               no                       no      1035
## C1334     25.14446              yes                       no      1049
##       ovarian_stimulation phenotype subject_id twin_no zygosity ENA-CHECKLIST
## C1223                  no   non-IVF      30151       1       MZ     ERC000026
## C1229                  no   non-IVF      10331       1       DZ     ERC000026
## C1331                  no   non-IVF      10332       2       DZ     ERC000026
## C1332                  no   non-IVF      10351       1       MZ     ERC000026
## C1333                  no   non-IVF      10352       2       MZ     ERC000026
## C1334                  no   non-IVF      10491       1       DZ     ERC000026
##       groups    sex
## C1223     no female
## C1229     no female
## C1331     no female
## C1332     no female
## C1333     no female
## C1334     no   male
# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                   baseMean log2FoldChange     lfcSE     stat       pvalue
## chrM 15901 16000  80.40743      1.0701990 0.1949769 5.488850 4.045582e-08
## chrM 15801 15900  84.83878      1.0395112 0.1928096 5.391386 6.991612e-08
## chrM 15701 15800 129.75449      0.8665500 0.1675253 5.172652 2.307942e-07
## chrM 9801 9900    33.63418      1.0435245 0.2049771 5.090932 3.563077e-07
## chrM 16001 16100 122.12572      0.8376368 0.1646069 5.088710 3.605076e-07
## chrM 9701 9800    35.70170      1.0274973 0.2027455 5.067916 4.021954e-07
## chrM 12701 12800  79.06658      0.8565974 0.1693641 5.057728 4.242804e-07
## chrM 8101 8200    58.74518      0.9484985 0.1897191 4.999489 5.748245e-07
## chrM 12601 12700 115.37154      0.7766378 0.1555086 4.994178 5.908674e-07
## chrM 10701 10800 217.62689      0.6416102 0.1309418 4.899966 9.585312e-07
##                          padj
## chrM 15901 16000 5.764584e-05
## chrM 15801 15900 5.764584e-05
## chrM 15701 15800 9.994833e-05
## chrM 9801 9900   9.994833e-05
## chrM 16001 16100 9.994833e-05
## chrM 9701 9800   9.994833e-05
## chrM 12701 12800 9.994833e-05
## chrM 8101 8200   1.082600e-04
## chrM 12601 12700 1.082600e-04
## chrM 10701 10800 1.203783e-04
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")

#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","nat",mygroups)
mygroups<-gsub("yes","icsi",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm3",mx=y,groups=mygroups,n=15)

make_beeswarms_confects2(confects=confects,name="dm3",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': M, Un_gl000214, Un_gl000211, Un_gl000237
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 322 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       30885 |         0
##     [2]         2       47137 |     18957
##     [3]         3          40 |         0
##     [4]         4       15971 |         0
##     [5]         5       19852 |         0
##     ...       ...         ... .       ...
##   [318]       318       39133 |         0
##   [319]       319       50554 |    104525
##   [320]       320       54801 |         0
##   [321]       321        5958 |         0
##   [322]       322       11027 |         0
##   -------
##   queryLength: 322 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 322 ranges and 2 metadata columns:
##         seqnames              ranges strand |        gene    distance
##            <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##     [1]        2 125398601-125398700      * |     CNTNAP5           0
##     [2]        6 138464001-138464100      * |    KIAA1244       18957
##     [3]        1       569601-569700      * |    MTATP6P1           0
##     [4]       14   58614001-58614100      * |    C14orf37           0
##     [5]       16   12452101-12452200      * |       SNX29           0
##     ...      ...                 ...    ... .         ...         ...
##   [318]        3 194851801-194851900      * |      XXYLT1           0
##   [319]        8     2218001-2218100      * |       MYOM2      104525
##   [320]        9 128455901-128456000      * |     MAPKAP1           0
##   [321]       10   33599801-33599900      * |        NRP1           0
##   [322]       12     3496101-3496200      * |       PRMT8           0
##   -------
##   seqinfo: 26 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000225
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 167 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       39965 |     28168
##     [2]         2       54801 |      5887
##     [3]         3       14995 |         0
##     [4]         4       49494 |         0
##     [5]         5       38772 |         0
##     ...       ...         ... .       ...
##   [163]       163       62855 |     34917
##   [164]       164         315 |     11633
##   [165]       165       52699 |    206599
##   [166]       166        7626 |    154414
##   [167]       167       17368 |         0
##   -------
##   queryLength: 167 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 167 ranges and 2 metadata columns:
##         seqnames              ranges strand |           gene    distance
##            <Rle>           <IRanges>  <Rle> |    <character> <DataFrame>
##     [1]        4   49277101-49277200      * | RP11-1281K21.7       28168
##     [2]        9 128475401-128475500      * |        MAPKAP1        5887
##     [3]       13 114771701-114771800      * |          RASA3           0
##     [4]        7 102439401-102439500      * |        FAM185A           0
##     [5]        3 175318401-175318500      * |       NAALADL2           0
##     ...      ...                 ...    ... .            ...         ...
##   [163]        X 114994301-114994400      * |   RP1-241P17.1       34917
##   [164]        1     9283101-9283200      * |           H6PD       11633
##   [165]        8 138324301-138324400      * |      RNU6-144P      206599
##   [166]       10 133335001-133335100      * |     AL450307.2      154414
##   [167]       15   22738801-22738900      * |       GOLGA6L1           0
##   -------
##   seqinfo: 24 sequences from an unspecified genome; no seqlengths

Natural Vs ICSI frozen

NAME = "CBMC_natural_vs_intracytoplasmic_sperm_injection_frozen_embryo"
samplesheet<-subset(mdc, medical_help_to_conceive=="no" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="yes"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                          baseMean log2FoldChange     lfcSE     stat
## chr9 42004901 42005000   11.62129      0.8150068 0.1522669 5.352489
## chr16 21582801 21582900  12.16087      0.7208933 0.1582875 4.554328
## chr2 238790601 238790700 11.38491      0.7958142 0.1793087 4.438235
## chr9 42004801 42004900   18.32877      0.6162550 0.1403154 4.391926
## chr14 70813401 70813500  13.93291      0.6546633 0.1507850 4.341702
## chr10 86611901 86612000  11.73915      0.6847153 0.1577329 4.340978
## chr21 11096201 11096300  16.00287      0.6773271 0.1563348 4.332542
## chr15 33227601 33227700  10.82165      0.6938120 0.1618929 4.285625
## chr16 21582901 21583000  11.35963      0.6882666 0.1627079 4.230074
## chr7 57832601 57832700   15.72667      0.5589624 0.1350924 4.137629
##                                pvalue         padj
## chr9 42004901 42005000   8.675278e-08 0.0001448771
## chr16 21582801 21582900  5.255325e-06 0.0035164779
## chr2 238790601 238790700 9.069965e-06 0.0035164779
## chr9 42004801 42004900   1.123509e-05 0.0035164779
## chr14 70813401 70813500  1.413833e-05 0.0035164779
## chr10 86611901 86612000  1.418496e-05 0.0035164779
## chr21 11096201 11096300  1.473973e-05 0.0035164779
## chr15 33227601 33227700  1.822264e-05 0.0038039760
## chr16 21582901 21583000  2.336145e-05 0.0043348461
## chr7 57832601 57832700   3.509136e-05 0.0058258879
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")

#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","nat",mygroups)
mygroups<-gsub("yes","icsi.fz",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm4",mx=y,groups=mygroups,n=15)

make_beeswarms_confects2(confects=confects,name="dm4",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': M, Un_gl000237, Un_gl000211, Un_gl000224
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 298 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       53616 |         0
##     [2]         2       20069 |     16047
##     [3]         3       32662 |         0
##     [4]         4       53616 |         0
##     [5]         5       16222 |         0
##     ...       ...         ... .       ...
##   [294]       294       34598 |         0
##   [295]       295        3861 |         0
##   [296]       296       39153 |         0
##   [297]       297       41687 |     23046
##   [298]       298        5884 |         0
##   -------
##   queryLength: 298 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 298 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]        9   42004901-42005000      * |  RP11-204M4.2           0
##     [2]       16   21582801-21582900      * |       SCARNA6       16047
##     [3]        2 238790601-238790700      * |         RAMP1           0
##     [4]        9   42004801-42004900      * |  RP11-204M4.2           0
##     [5]       14   70813401-70813500      * | SYNJ2BP-COX16           0
##     ...      ...                 ...    ... .           ...         ...
##   [294]       21   38990401-38990500      * |         KCNJ6           0
##   [295]        1 169538301-169538400      * |            F5           0
##   [296]        3 195425401-195425500      * |     LINC00969           0
##   [297]        4 184526501-184526600      * |        snoU13       23046
##   [298]       10   29797801-29797900      * |          SVIL           0
##   -------
##   seqinfo: 26 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224, Un_gl000231
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 135 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       17363 |      3147
##     [2]         2       21448 |     31397
##     [3]         3       14995 |         0
##     [4]         4       38578 |         0
##     [5]         5       12222 |         0
##     ...       ...         ... .       ...
##   [131]       131       20192 |         0
##   [132]       132       14956 |         0
##   [133]       133       28820 |     56421
##   [134]       134        1131 |         0
##   [135]       135       61537 |         0
##   -------
##   queryLength: 135 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 135 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]       15   22671901-22672000      * |     MIR4509-1        3147
##     [2]       16   80978201-80978300      * |          CMC2       31397
##     [3]       13 114794601-114794700      * |         RASA3           0
##     [4]        3 158818901-158819000      * |          IQCJ           0
##     [5]       12   54883001-54883100      * | RP11-753H16.3           0
##     ...      ...                 ...    ... .           ...         ...
##   [131]       16   28058001-28058100      * |         GSG1L           0
##   [132]       13 113397101-113397200      * |        ATP11A           0
##   [133]        2     8627201-8627300      * |    AC011747.3       56421
##   [134]        1   35382901-35383000      * |        DLGAP3           0
##   [135]        X   27640301-27640400      * |       DCAF8L2           0
##   -------
##   seqinfo: 25 sequences from an unspecified genome; no seqlengths

ICSI fresh Vs ICSI frozen

NAME = "CBMC_intracytoplasmic_sperm_injection_fresh_embryo_vs_intracytoplasmic_sperm_injection_frozen_embryo"
samplesheet<-subset(mdc, intracytoplasmic_sperm_injection=="yes")
samplesheet$groups <- factor(samplesheet$frozen_embryo ,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]


# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr3 195420501 195420600   13.98074     -0.9538785 0.2278067 -4.187227
## chr3 195420601 195420700   13.35609     -0.8119696 0.2691280 -3.017039
## chr3 195420401 195420500   15.35303     -0.6085713 0.2028382 -3.000279
## chrUn_gl000214 63901 64000 22.02004     -0.5738894 0.2044693 -2.806727
## chr20 3077101 3077200      12.65230      0.5247725 0.2058575  2.549203
## chr16 12451901 12452000    10.33107     -0.6182277 0.2521667 -2.451663
## chr9 42416101 42416200     11.49863     -0.6769743 0.2802809 -2.415343
## chr9 44070601 44070700     28.10471     -0.5783527 0.2398014 -2.411799
## chr6 104197201 104197300   11.69186      0.5733055 0.2396681  2.392081
## chr5 754601 754700         23.74781      1.3014790 0.5521050  2.357303
##                                  pvalue       padj
## chr3 195420501 195420600   2.823828e-05 0.03591909
## chr3 195420601 195420700   2.552573e-03 0.99236223
## chr3 195420401 195420500   2.697322e-03 0.99236223
## chrUn_gl000214 63901 64000 5.004770e-03 0.99236223
## chr20 3077101 3077200      1.079695e-02 0.99236223
## chr16 12451901 12452000    1.421978e-02 0.99236223
## chr9 42416101 42416200     1.572042e-02 0.99236223
## chr9 44070601 44070700     1.587403e-02 0.99236223
## chr6 104197201 104197300   1.675315e-02 0.99236223
## chr5 754601 754700         1.840820e-02 0.99236223
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","icsi.fh",mygroups)
mygroups<-gsub("yes","icsi.fz",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm5",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dm5",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
ol_up
## Hits object with 19 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       32885 |      1947
##    [2]         2       46615 |    166057
##    [3]         3       41852 |         0
##    [4]         4       53616 |         0
##    [5]         5       19939 |         0
##    ...       ...         ... .       ...
##   [15]        15       46615 |    165957
##   [16]        16       32607 |         0
##   [17]        17       53616 |         0
##   [18]        18       18072 |     62330
##   [19]        19       17368 |      6745
##   -------
##   queryLength: 19 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 19 ranges and 2 metadata columns:
##        seqnames              ranges strand |         gene    distance
##           <Rle>           <IRanges>  <Rle> |  <character> <DataFrame>
##    [1]       20     3077101-3077200      * |    RN7SL555P        1947
##    [2]        6 104197201-104197300      * |      SNORA33      166057
##    [3]        5       754601-754700      * |     ZDHHC11B           0
##    [4]        9   42004901-42005000      * | RP11-204M4.2           0
##    [5]       16   15799501-15799600      * |        MYH11           0
##    ...      ...                 ...    ... .          ...         ...
##   [15]        6 104197101-104197200      * |      SNORA33      165957
##   [16]        2 234756001-234756100      * |        HJURP           0
##   [17]        9   42005001-42005100      * | RP11-204M4.2           0
##   [18]       15   47187301-47187400      * |   AC087433.1       62330
##   [19]       15   22729401-22729500      * |     GOLGA6L1        6745
##   -------
##   seqinfo: 11 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000214, M, Un_gl000224
##   - in 'y': 1, 10, 13, 14, 17, 18, 19, 2, 20, 21, 4, 5, 8, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 17 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       39153 |         0
##    [2]         2       39153 |         0
##    [3]         3       39153 |         0
##    [4]         4       19852 |         0
##    [5]         5       53630 |         0
##    ...       ...         ... .       ...
##   [13]        13       17363 |      3147
##   [14]        14       47137 |     18957
##   [15]        15       11368 |     13337
##   [16]        16       35621 |      5957
##   [17]        17       19852 |         0
##   -------
##   queryLength: 17 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 17 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]        3 195420501-195420600      * |     LINC00969           0
##    [2]        3 195420601-195420700      * |     LINC00969           0
##    [3]        3 195420401-195420500      * |     LINC00969           0
##    [4]       16   12451901-12452000      * |         SNX29           0
##    [5]        9   42416101-42416200      * | RP11-146D12.2           0
##    ...      ...                 ...    ... .           ...         ...
##   [13]       15   22671901-22672000      * |     MIR4509-1        3147
##   [14]        6 138464001-138464100      * |      KIAA1244       18957
##   [15]       12   12455601-12455700      * |    RNU6-1295P       13337
##   [16]       22   32717101-32717200      * | RP1-149A16.12        5957
##   [17]       16   12452101-12452200      * |         SNX29           0
##   -------
##   seqinfo: 13 sequences from an unspecified genome; no seqlengths

ovarian stimulation vs GIFT

NAME = "CBMC_ovarian_stimulation_vs_gamete_intrafallopian_transfer"
samplesheet<-subset(mdc, ovarian_stimulation=="yes" | gamete_intrafallopian_transfer=="yes")
samplesheet$groups <- factor(samplesheet$gamete_intrafallopian_transfer,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr15 22517001 22517100   19.836003     -1.5072261 0.4686102 -3.216375
## chr15 22516801 22516900   15.616048     -1.4854422 0.5008465 -2.965863
## chr15 22457901 22458000   11.264676     -1.6656359 0.6003636 -2.774379
## chrX 56795901 56796000    10.770330     -1.7445134 0.6623537 -2.633809
## chrX 56802601 56802700    12.438709     -1.6276618 0.6211741 -2.620299
## chr17 1022101 1022200     10.649288     -1.8671760 0.7157074 -2.608854
## chr2 209767401 209767500  12.053792     -1.2696311 0.4900442 -2.590850
## chr11 121887901 121888000 11.234663     -2.5346499 0.9813459 -2.582830
## chr5 771901 772000        15.126876      1.1039805 0.4422674  2.496183
## chr2 59050301 59050400     9.703602      0.8342431 0.3384117  2.465172
##                                pvalue    padj
## chr15 22517001 22517100   0.001298211 0.99119
## chr15 22516801 22516900   0.003018346 0.99119
## chr15 22457901 22458000   0.005530723 0.99119
## chrX 56795901 56796000    0.008443291 0.99119
## chrX 56802601 56802700    0.008785279 0.99119
## chr17 1022101 1022200     0.009084600 0.99119
## chr2 209767401 209767500  0.009573910 0.99119
## chr11 121887901 121888000 0.009799354 0.99119
## chr5 771901 772000        0.012553767 0.99119
## chr2 59050301 59050400    0.013694750 0.99119
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue",pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
confects <- deseq2_confects(res)
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","os",mygroups)
mygroups<-gsub("yes","gift",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm6",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dm6",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000219
##   - in 'y': 11, 13, 14, 18, 20, 21, 22, 3, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 19 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       41851 |         0
##    [2]         2       29618 |         0
##    [3]         3       28947 |         0
##    [4]         4         782 |      7917
##    [5]         5       46046 |       764
##    ...       ...         ... .       ...
##   [15]        15        5610 |         0
##   [16]        16       47711 |         0
##   [17]        17       53426 |         0
##   [18]        18       17552 |      2970
##   [19]        19       22550 |         0
##   -------
##   queryLength: 19 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 19 ranges and 2 metadata columns:
##        seqnames            ranges strand |         gene    distance
##           <Rle>         <IRanges>  <Rle> |  <character> <DataFrame>
##    [1]        5     771901-772000      * |      ZDHHC11           0
##    [2]        2 59050301-59050400      * |    LINC01122           0
##    [3]        2 16137301-16137400      * |   AC010145.4           0
##    [4]        1 24546101-24546200      * | RP11-10N16.2        7917
##    [5]        6 52056201-52056300      * |        IL17A         764
##    ...      ...               ...    ... .          ...         ...
##   [15]       10 12404701-12404800      * |       CAMK1D           0
##   [16]        7   4118301-4118400      * |         SDK1           0
##   [17]        9 35059701-35059800      * |          VCP           0
##   [18]       15 28674701-28674800      * |    MIR4509-3        2970
##   [19]       17 21072101-21072200      * |       DHRS7B           0
##   -------
##   seqinfo: 15 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': M
##   - in 'y': 1, 10, 13, 14, 16, 18, 19, 20, 21, 22, 3, 5, 6, 7, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 23 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       17359 |      3720
##    [2]         2       17359 |      3520
##    [3]         3       17354 |         0
##    [4]         4       62108 |         0
##    [5]         5       62108 |         0
##    ...       ...         ... .       ...
##   [19]        19       21747 |         0
##   [20]        20       62108 |         0
##   [21]        21       62108 |         0
##   [22]        22       51752 |     55969
##   [23]        23       11497 |     34153
##   -------
##   queryLength: 23 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 23 ranges and 2 metadata columns:
##        seqnames            ranges strand |          gene    distance
##           <Rle>         <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]       15 22517001-22517100      * |      MIR1268A        3720
##    [2]       15 22516801-22516900      * |      MIR1268A        3520
##    [3]       15 22457901-22458000      * |    AC010760.1           0
##    [4]        X 56795901-56796000      * | RP11-622K12.1           0
##    [5]        X 56802601-56802700      * | RP11-622K12.1           0
##    ...      ...               ...    ... .           ...         ...
##   [19]       17   1022201-1022300      * |           ABR           0
##   [20]        X 56802401-56802500      * | RP11-622K12.1           0
##   [21]        X 56803601-56803700      * | RP11-622K12.1           0
##   [22]        8 70281201-70281300      * | RP11-744J10.3       55969
##   [23]       12 20316901-20317000      * |   CTC-465D4.1       34153
##   -------
##   seqinfo: 9 sequences from an unspecified genome; no seqlengths

ovarian stimulation vs ICSI fresh

NAME = "CBMC_ovarian_stimulation_vs_intracytoplasmic_sperm_injection_fresh_embryo"
samplesheet<-subset(mdc, ovarian_stimulation=="yes" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="no"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                               baseMean log2FoldChange     lfcSE      stat
## chrUn_gl000224 122201 122300 10.557247     -0.9367754 0.2330955 -4.018849
## chrUn_gl000224 122101 122200  9.860153     -0.9160608 0.2347230 -3.902732
## chrUn_gl000219 46801 46900   22.540470     -0.4425577 0.1233755 -3.587080
## chrUn_gl000219 69701 69800   11.086934     -0.6714399 0.2259601 -2.971498
## chrUn_gl000224 96501 96600   34.499130     -0.6074352 0.2184618 -2.780510
## chrUn_gl000224 172901 173000 14.072586     -0.6593878 0.2429459 -2.714134
## chr11 91819701 91819800      13.211128     -0.4176278 0.1547081 -2.699456
## chr15 45983901 45984000       9.656976     -0.4715065 0.1768910 -2.665520
## chr8 14280901 14281000       13.421089     -0.4006120 0.1507596 -2.657291
## chrUn_gl000224 172301 172400 23.627497     -0.6112723 0.2351928 -2.599026
##                                    pvalue       padj
## chrUn_gl000224 122201 122300 5.848318e-05 0.05854211
## chrUn_gl000224 122101 122200 9.511310e-05 0.05854211
## chrUn_gl000219 46801 46900   3.344013e-04 0.13721598
## chrUn_gl000219 69701 69800   2.963507e-03 0.91201941
## chrUn_gl000224 96501 96600   5.427353e-03 0.99810330
## chrUn_gl000224 172901 173000 6.644918e-03 0.99810330
## chr11 91819701 91819800      6.945302e-03 0.99810330
## chr15 45983901 45984000      7.686947e-03 0.99810330
## chr8 14280901 14281000       7.877152e-03 0.99810330
## chrUn_gl000224 172301 172400 9.348864e-03 0.99810330
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","os",mygroups)
mygroups<-gsub("yes","icsi.fh",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm7",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dm7",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224
##   - in 'y': 10, 11, 14, 15, 16, 17, 18, 19, 20, 4, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 16 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       35058 |      4705
##    [2]         2       28875 |         0
##    [3]         3       63283 |         0
##    [4]         4       51017 |         0
##    [5]         5       41851 |         0
##    ...       ...         ... .       ...
##   [12]        12       43839 |         0
##   [13]        13       13600 |         0
##   [14]        14       47711 |         0
##   [15]        15       50555 |    108791
##   [16]        16       39129 |         0
##   -------
##   queryLength: 16 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 16 ranges and 2 metadata columns:
##        seqnames              ranges strand |        gene    distance
##           <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##    [1]       22   20361401-20361500      * |      GGTLC3        4705
##    [2]        2   10767601-10767700      * |       NOL10           0
##    [3]        X 140956801-140956900      * |      MAGEC3           0
##    [4]        8   23421301-23421400      * |    SLC25A37           0
##    [5]        5       771901-772000      * |     ZDHHC11           0
##    ...      ...                 ...    ... .         ...         ...
##   [12]        5 138660801-138660900      * |       MATR3           0
##   [13]       12 125021101-125021200      * |       NCOR2           0
##   [14]        7     4118401-4118500      * |        SDK1           0
##   [15]        8     2242101-2242200      * |  AC133633.2      108791
##   [16]        3 194501101-194501200      * |  AC090505.6           0
##   -------
##   seqinfo: 13 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224, Un_gl000219
##   - in 'y': 1, 12, 14, 16, 17, 18, 19, 20, 21, 22, 3, 5, 7, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 12 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       10095 |     74471
##    [2]         2       18063 |       408
##    [3]         3       50861 |         0
##    [4]         4       46615 |    166057
##    [5]         5       10095 |     74371
##    ...       ...         ... .       ...
##    [8]         8       28749 |         0
##    [9]         9        7501 |      8817
##   [10]        10       41242 |      5377
##   [11]        11       14843 |         0
##   [12]        12       28749 |         0
##   -------
##   queryLength: 12 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 12 ranges and 2 metadata columns:
##        seqnames              ranges strand |         gene    distance
##           <Rle>           <IRanges>  <Rle> |  <character> <DataFrame>
##    [1]       11   91819701-91819800      * |     RPL7AP57       74471
##    [2]       15   45983901-45984000      * |        SQRDL         408
##    [3]        8   14280901-14281000      * |      LRG_208           0
##    [4]        6 104197201-104197300      * |      SNORA33      166057
##    [5]       11   91819801-91819900      * |     RPL7AP57       74371
##    ...      ...                 ...    ... .          ...         ...
##    [8]        2     3082101-3082200      * |   AC019118.2           0
##    [9]       10 123486901-123487000      * | RP11-78A18.2        8817
##   [10]        4 147448501-147448600      * |      SLC10A7        5377
##   [11]       13 101727001-101727100      * |        NALCN           0
##   [12]        2     3082201-3082300      * |   AC019118.2           0
##   -------
##   seqinfo: 10 sequences from an unspecified genome; no seqlengths

ovarian stimulation vs ICSI frozen

NAME = "CBMC_ovarian_stimulation_vs_intracytoplasmic_sperm_injection_frozen_embryo"
samplesheet<-subset(mdc, ovarian_stimulation=="yes" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="yes"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                               baseMean log2FoldChange     lfcSE      stat
## chrUn_gl000224 122201 122300 10.491565     -0.9596579 0.2363674 -4.060027
## chrUn_gl000224 122101 122200  9.776760     -0.9458815 0.2372979 -3.986050
## chrUn_gl000219 46801 46900   22.586278     -0.4419041 0.1250299 -3.534388
## chrUn_gl000224 96501 96600   34.081549     -0.6516785 0.2163350 -3.012358
## chrUn_gl000219 69701 69800   11.032325     -0.6843652 0.2293089 -2.984468
## chr15 45983901 45984000       9.614643     -0.4905039 0.1784562 -2.748596
## chrUn_gl000224 172901 173000 14.004084     -0.6739810 0.2462707 -2.736748
## chr11 91819701 91819800      13.264045     -0.4205049 0.1556997 -2.700743
## chrUn_gl000219 3301 3400     14.622256     -0.6318173 0.2359205 -2.678094
## chrUn_gl000224 96401 96500   30.338126     -0.5997649 0.2258659 -2.655403
##                                    pvalue       padj
## chrUn_gl000224 122201 122300 4.906704e-05 0.04121632
## chrUn_gl000224 122101 122200 6.718227e-05 0.04121632
## chrUn_gl000219 46801 46900   4.087207e-04 0.16716677
## chrUn_gl000224 96501 96600   2.592267e-03 0.69711224
## chrUn_gl000219 69701 69800   2.840718e-03 0.69711224
## chr15 45983901 45984000      5.985118e-03 0.77071254
## chrUn_gl000224 172901 173000 6.204974e-03 0.77071254
## chr11 91819701 91819800      6.918485e-03 0.77071254
## chrUn_gl000219 3301 3400     7.404250e-03 0.77071254
## chrUn_gl000224 96401 96500   7.921373e-03 0.77071254
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","os",mygroups)
mygroups<-gsub("yes","icsi.fz",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm8",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dm8",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224
##   - in 'y': 10, 11, 14, 15, 16, 17, 18, 19, 20, 3, 4, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 17 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       28875 |         0
##    [2]         2       31333 |     16431
##    [3]         3       35058 |      4705
##    [4]         4       63283 |         0
##    [5]         5       51017 |         0
##    ...       ...         ... .       ...
##   [13]        13       43839 |         0
##   [14]        14       50555 |    108791
##   [15]        15       50560 |         0
##   [16]        16       13600 |         0
##   [17]        17       47711 |         0
##   -------
##   queryLength: 17 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 17 ranges and 2 metadata columns:
##        seqnames              ranges strand |        gene    distance
##           <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##    [1]        2   10767601-10767700      * |       NOL10           0
##    [2]        2 157888201-157888300      * |  AC108057.1       16431
##    [3]       22   20361401-20361500      * |      GGTLC3        4705
##    [4]        X 140956801-140956900      * |      MAGEC3           0
##    [5]        8   23421301-23421400      * |    SLC25A37           0
##    ...      ...                 ...    ... .         ...         ...
##   [13]        5 138660801-138660900      * |       MATR3           0
##   [14]        8     2242101-2242200      * |  AC133633.2      108791
##   [15]        8     3569501-3569600      * |       CSMD1           0
##   [16]       12 125021101-125021200      * |       NCOR2           0
##   [17]        7     4118401-4118500      * |        SDK1           0
##   -------
##   seqinfo: 12 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224, Un_gl000219
##   - in 'y': 1, 12, 14, 16, 17, 18, 19, 20, 21, 22, 3, 5, 7, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 12 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       18063 |       408
##    [2]         2       10095 |     74471
##    [3]         3       50861 |         0
##    [4]         4       10095 |     74371
##    [5]         5       46615 |    166057
##    ...       ...         ... .       ...
##    [8]         8       46615 |    165957
##    [9]         9       28754 |         0
##   [10]        10       28749 |         0
##   [11]        11       41242 |      5377
##   [12]        12       14843 |         0
##   -------
##   queryLength: 12 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 12 ranges and 2 metadata columns:
##        seqnames              ranges strand |        gene    distance
##           <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##    [1]       15   45983901-45984000      * |       SQRDL         408
##    [2]       11   91819701-91819800      * |    RPL7AP57       74471
##    [3]        8   14280901-14281000      * |     LRG_208           0
##    [4]       11   91819801-91819900      * |    RPL7AP57       74371
##    [5]        6 104197201-104197300      * |     SNORA33      166057
##    ...      ...                 ...    ... .         ...         ...
##    [8]        6 104197101-104197200      * |     SNORA33      165957
##    [9]        2     3258201-3258300      * |       TSSC1           0
##   [10]        2     3082101-3082200      * |  AC019118.2           0
##   [11]        4 147448501-147448600      * |     SLC10A7        5377
##   [12]       13 101727001-101727100      * |       NALCN           0
##   -------
##   seqinfo: 10 sequences from an unspecified genome; no seqlengths

GIFT vs ICSI fresh

NAME = "CBMC_gamete_intrafallopian_transfer_vs_intracytoplasmic_sperm_injection_fresh_embryo"
samplesheet<-subset(mdc, gamete_intrafallopian_transfer=="yes" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="no"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr5 180085501 180085600   16.60962      -1.653730 0.4510497 -3.666403
## chr5 180085301 180085400   16.38130      -1.344157 0.3699535 -3.633314
## chr5 180085401 180085500   18.80753      -1.497292 0.4207640 -3.558508
## chr15 22517001 22517100    18.95745       1.581406 0.4902676  3.225597
## chr5 180085201 180085300   12.10385      -1.197424 0.3778276 -3.169235
## chr15 22516801 22516900    15.09151       1.637913 0.5225503  3.134460
## chrX 56795901 56796000     11.08498       1.823778 0.5958015  3.061050
## chrX 56802601 56802700     12.71956       1.611248 0.5479423  2.940543
## chrX 56803601 56803700     15.46328       1.206362 0.4399174  2.742247
## chrUn_gl000218 19001 19100 10.64531       1.860957 0.6805514  2.734484
##                                  pvalue      padj
## chr5 180085501 180085600   0.0002459859 0.1685816
## chr5 180085301 180085400   0.0002798045 0.1685816
## chr5 180085401 180085500   0.0003729681 0.1685816
## chr15 22517001 22517100    0.0012571005 0.3891065
## chr5 180085201 180085300   0.0015284095 0.3891065
## chr15 22516801 22516900    0.0017217101 0.3891065
## chrX 56795901 56796000     0.0022056203 0.4272602
## chrX 56802601 56802700     0.0032763746 0.5553455
## chrX 56803601 56803700     0.0061020353 0.8472028
## chrUn_gl000218 19001 19100 0.0062478086 0.8472028
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","gift",mygroups)
mygroups<-gsub("yes","icsi.fh",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm9",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dm9",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000218, M
##   - in 'y': 1, 10, 11, 12, 14, 16, 17, 18, 19, 2, 20, 21, 22, 4, 5, 7, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 17 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       17359 |      3720
##    [2]         2       17359 |      3520
##    [3]         3       62108 |         0
##    [4]         4       62108 |         0
##    [5]         5       62108 |         0
##    ...       ...         ... .       ...
##   [13]        13       17359 |      9820
##   [14]        14       47622 |      3124
##   [15]        15       14877 |    159377
##   [16]        16       39153 |         0
##   [17]        17       52489 |     21610
##   -------
##   queryLength: 17 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 17 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]       15   22517001-22517100      * |      MIR1268A        3720
##    [2]       15   22516801-22516900      * |      MIR1268A        3520
##    [3]        X   56795901-56796000      * | RP11-622K12.1           0
##    [4]        X   56802601-56802700      * | RP11-622K12.1           0
##    [5]        X   56803601-56803700      * | RP11-622K12.1           0
##    ...      ...                 ...    ... .           ...         ...
##   [13]       15   22523101-22523200      * |      MIR1268A        9820
##   [14]        6 171040701-171040800      * | XX-C2158C12.2        3124
##   [15]       13 105307201-105307300      * |       RPL7P45      159377
##   [16]        3 195420601-195420700      * |     LINC00969           0
##   [17]        8 123462401-123462500      * |  RP11-94A24.1       21610
##   -------
##   seqinfo: 8 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000219
##   - in 'y': 1, 10, 11, 12, 13, 15, 19, 20, 21, 22, 3, 4, 8, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 17 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       44657 |      8876
##    [2]         2       44657 |      8676
##    [3]         3       44657 |      8776
##    [4]         4       44657 |      8576
##    [5]         5       25758 |      6997
##    ...       ...         ... .       ...
##   [13]        13       41851 |         0
##   [14]        14       16367 |      1430
##   [15]        15       29618 |         0
##   [16]        16       47809 |      3265
##   [17]        17       21486 |         0
##   -------
##   queryLength: 17 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 17 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]        5 180085501-180085600      * |          FLT4        8876
##    [2]        5 180085301-180085400      * |          FLT4        8676
##    [3]        5 180085401-180085500      * |          FLT4        8776
##    [4]        5 180085201-180085300      * |          FLT4        8576
##    [5]       18   77379301-77379400      * | RP11-567M16.2        6997
##    ...      ...                 ...    ... .           ...         ...
##   [13]        5       771901-772000      * |       ZDHHC11           0
##   [14]       14   76039701-76039800      * |    AC007182.6        1430
##   [15]        2   59050301-59050400      * |     LINC01122           0
##   [16]        7     7111801-7111900      * |    AC092104.4        3265
##   [17]       16   83260701-83260800      * |         CDH13           0
##   -------
##   seqinfo: 10 sequences from an unspecified genome; no seqlengths

GIFT vs ICSI frozen

NAME = "CBMC_gamete_intrafallopian_transfer_vs_intracytoplasmic_sperm_injection_frozen_embryo"
samplesheet<-subset(mdc, gamete_intrafallopian_transfer=="yes" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="yes"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE     stat
## chrM 14901 15000           85.40041      1.1496511 0.3181672 3.613355
## chr11 121887901 121888000  11.54641      2.9141594 0.8438210 3.453528
## chrM 9901 10000            28.68350      1.2758382 0.3813172 3.345871
## chrM 10501 10600           93.49152      0.9004232 0.2723920 3.305616
## chrM 9801 9900             30.37192      1.1998185 0.3851548 3.115159
## chr15 22457901 22458000    11.36440      2.0659323 0.6884990 3.000632
## chrM 9701 9800             33.16048      1.1498672 0.3896752 2.950835
## chrM 10601 10700          146.18615      0.6872482 0.2360390 2.911588
## chrM 3201 3300             69.61076      0.8435279 0.2912254 2.896478
## chrM 201 300              125.10452      0.7347937 0.2545862 2.886228
##                                 pvalue       padj
## chrM 14901 15000          0.0003022609 0.03083061
## chr11 121887901 121888000 0.0005533052         NA
## chrM 9901 10000           0.0008202468         NA
## chrM 10501 10600          0.0009476797 0.04833167
## chrM 9801 9900            0.0018384600 0.05019786
## chr15 22457901 22458000   0.0026941974         NA
## chrM 9701 9800            0.0031691602 0.05019786
## chrM 10601 10700          0.0035959714 0.05019786
## chrM 3201 3300            0.0037737751 0.05019786
## chrM 201 300              0.0038988974 0.05019786
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","gift",mygroups)
mygroups<-gsub("yes","icsi.fz",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dm10",mx=y,groups=mygroups,n=15)

make_beeswarms_confects2(confects=confects,name="dm10",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': M
##   - in 'y': 1, 10, 12, 13, 14, 16, 18, 19, 20, 21, 22, 3, 5, 7, 8, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 14 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       10659 |     11062
##    [2]         2       17354 |         0
##    [3]         3       17359 |      3720
##    [4]         4       39983 |     45865
##    [5]         5       17359 |      3520
##    ...       ...         ... .       ...
##   [10]        10       47622 |      2924
##   [11]        11       24156 |         0
##   [12]        12       21747 |         0
##   [13]        13       62108 |         0
##   [14]        14       29066 |      2282
##   -------
##   queryLength: 14 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 14 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]       11 121887901-121888000      * | RP11-166D19.1       11062
##    [2]       15   22457901-22458000      * |    AC010760.1           0
##    [3]       15   22517001-22517100      * |      MIR1268A        3720
##    [4]        4   52663201-52663300      * |       DCUN1D4       45865
##    [5]       15   22516801-22516900      * |      MIR1268A        3520
##    ...      ...                 ...    ... .           ...         ...
##   [10]        6 171040501-171040600      * | XX-C2158C12.2        2924
##   [11]       17   66261301-66261400      * |          ARSG           0
##   [12]       17     1022201-1022300      * |           ABR           0
##   [13]        X   56802501-56802600      * | RP11-622K12.1           0
##   [14]        2   24712401-24712500      * |         NCOA1        2282
##   -------
##   seqinfo: 8 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000219
##   - in 'y': 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 3, 4, 6, 7, 9, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 5 hits and 1 metadata column:
##       queryHits subjectHits |  distance
##       <integer>   <integer> | <integer>
##   [1]         1       41851 |         0
##   [2]         2       52744 |         0
##   [3]         3       28947 |         0
##   [4]         4       61198 |      1414
##   [5]         5       62280 |         0
##   -------
##   queryLength: 5 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 5 ranges and 2 metadata columns:
##       seqnames              ranges strand |        gene    distance
##          <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##   [1]        5       771901-772000      * |     ZDHHC11           0
##   [2]        8 143323601-143323700      * |     TSNARE1           0
##   [3]        2   16137301-16137400      * |  AC010145.4           0
##   [4]        X       284001-284100      * |   LINC00685        1414
##   [5]        X   71181401-71181500      * |       NHSL2           0
##   -------
##   seqinfo: 5 sequences from an unspecified genome; no seqlengths

WMBC

counts<-read.table("~/mr.edgeR.w.test.tsv.gz",sep="\t",header=TRUE,row.names=1)
mycol<-max(grep(".bam.counts",colnames(counts)))
counts<-counts[,1:mycol]
rownames(counts)<-paste(counts$chr,counts$start, counts$stop)
counts[,1:4]=NULL
colnames(counts)<-gsub(".bam.counts","",colnames(counts))

Natural Vs Ovarian stimulation

NAME = "WCB_natural_vs_ovarian_stimulation"
samplesheet<-subset(mdw, medical_help_to_conceive=="no" | ovarian_stimulation=="yes")
samplesheet$groups <- factor(samplesheet$ovarian_stimulation,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]
dim(y)
## [1] 4108   90
dim(samplesheet)
## [1] 90 19
colnames(y)
##  [1] "W1001" "W1002" "W1003" "W1004" "W1005" "W1006" "W1007" "W1008" "W1009"
## [10] "W1010" "W1021" "W1022" "W1023" "W1024" "W1025" "W1026" "W1027" "W1028"
## [19] "W1029" "W1030" "W1041" "W1042" "W1043" "W1044" "W1046" "W1047" "W1048"
## [28] "W1049" "W1050" "W1061" "W1062" "W1064" "W1065" "W1066" "W1067" "W1068"
## [37] "W1069" "W1070" "W1081" "W1082" "W1083" "W1084" "W1085" "W1086" "W1087"
## [46] "W1088" "W1089" "W1090" "W2001" "W2002" "W2003" "W2004" "W2005" "W2006"
## [55] "W2007" "W2009" "W2010" "W2021" "W2023" "W2024" "W2025" "W2026" "W2027"
## [64] "W2028" "W2029" "W2030" "W2042" "W2043" "W2046" "W2047" "W2048" "W2049"
## [73] "W2050" "W2051" "W2052" "W2053" "W2054" "W2055" "W2057" "W2058" "W2059"
## [82] "W2060" "W3031" "W3032" "W3033" "W3034" "W3035" "W3036" "W3037" "W3038"
rownames(samplesheet)[which(!rownames(samplesheet)%in%colnames(y))]
## character(0)
dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                          baseMean log2FoldChange      lfcSE      stat
## chr22 18235701 18235800  12.00608     -0.6233634 0.10407128 -5.989774
## chr22 18235601 18235700  10.96159     -0.5996109 0.10875599 -5.513359
## chr22 47374601 47374700  10.36687     -0.5790462 0.10907366 -5.308763
## chr2 127539601 127539700 12.98883     -0.4889375 0.09263947 -5.277852
## chr21 47679401 47679500   9.72660     -0.5877305 0.11175865 -5.258926
## chr9 130947601 130947700 14.63778     -0.5283858 0.10128306 -5.216921
## chr12 6155701 6155800    12.46589     -0.6123979 0.11822069 -5.180124
## chr15 42251201 42251300  16.42990     -0.4280639 0.08299098 -5.157958
## chr20 44494301 44494400  11.02396     -0.5246181 0.10173111 -5.156909
## chr19 45812201 45812300  10.46110     -0.5815381 0.11290401 -5.150730
##                                pvalue         padj
## chr22 18235701 18235800  2.101335e-09 8.632283e-06
## chr22 18235601 18235700  3.520483e-08 7.231071e-05
## chr22 47374601 47374700  1.103717e-07 9.257390e-05
## chr2 127539601 127539700 1.307067e-07 9.257390e-05
## chr21 47679401 47679500  1.448994e-07 9.257390e-05
## chr9 130947601 130947700 1.819214e-07 9.257390e-05
## chr12 6155701 6155800    2.217382e-07 9.257390e-05
## chr15 42251201 42251300  2.496583e-07 9.257390e-05
## chr20 44494301 44494400  2.510596e-07 9.257390e-05
## chr19 45812201 45812300  2.594741e-07 9.257390e-05
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","nat",mygroups)
mygroups<-gsub("yes","os",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw1",mx=y,groups=mygroups,n=15)

make_beeswarms_confects2(confects=confects,name="dmw1",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224, Un_gl000219, Un_gl000214, Un_gl000234, Un_gl000221
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 948 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       50305 |      4646
##     [2]         2       50558 |         0
##     [3]         3       21913 |      1391
##     [4]         4       50558 |        90
##     [5]         5       41643 |     66790
##     ...       ...         ... .       ...
##   [944]       944       36047 |         0
##   [945]       945       29932 |         0
##   [946]       946       21486 |         0
##   [947]       947       47173 |     45953
##   [948]       948        4139 |         0
##   -------
##   queryLength: 948 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 948 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]        7 148676101-148676200      * |          RNY3        4646
##     [2]        8     2523501-2523600      * | RP11-134O21.1           0
##     [3]       17     4834101-4834200      * |         GP1BA        1391
##     [4]        8     2523401-2523500      * | RP11-134O21.1          90
##     [5]        4 179923701-179923800      * | RP11-296L20.1       66790
##     ...      ...                 ...    ... .           ...         ...
##   [944]       22   47109601-47109700      * |          CERK           0
##   [945]        2   74285301-74285400      * |          TET3           0
##   [946]       16   83819801-83819900      * |         CDH13           0
##   [947]        6 141290001-141290100      * | RP11-471B18.1       45953
##   [948]        1 183028901-183029000      * |         LAMC1           0
##   -------
##   seqinfo: 28 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000225, 4_gl000194_random, Un_gl000232, Un_gl000224, 7_gl000195_random, Un_gl000231, Un_gl000237, Un_gl000222
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 1498 hits and 1 metadata column:
##          queryHits subjectHits |  distance
##          <integer>   <integer> | <integer>
##      [1]         1       34961 |         0
##      [2]         2       34961 |         0
##      [3]         3       36049 |         0
##      [4]         4       30899 |     79082
##      [5]         5       34847 |         0
##      ...       ...         ... .       ...
##   [1494]      1494       50554 |    108225
##   [1495]      1495         183 |         0
##   [1496]      1496         183 |         0
##   [1497]      1497         183 |         0
##   [1498]      1498         183 |         0
##   -------
##   queryLength: 1498 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 1498 ranges and 2 metadata columns:
##          seqnames              ranges strand |        gene    distance
##             <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##      [1]       22   18235701-18235800      * |         BID           0
##      [2]       22   18235601-18235700      * |         BID           0
##      [3]       22   47374601-47374700      * |    TBC1D22A           0
##      [4]        2 127539601-127539700      * |   RNU6-675P       79082
##      [5]       21   47679401-47679500      * |      MCM3AP           0
##      ...      ...                 ...    ... .         ...         ...
##   [1494]        8     2221701-2221800      * |       MYOM2      108225
##   [1495]        1     2602001-2602100      * |       TTC34           0
##   [1496]        1     2605501-2605600      * |       TTC34           0
##   [1497]        1     2595601-2595700      * |       TTC34           0
##   [1498]        1     2605601-2605700      * |       TTC34           0
##   -------
##   seqinfo: 32 sequences from an unspecified genome; no seqlengths

Natural Vs GIFT

NAME = "WBC_natural_vs_gamete_intrafallopian_transfer"
samplesheet<-subset(mdw, medical_help_to_conceive=="no" | gamete_intrafallopian_transfer=="yes")
samplesheet$groups <- factor(samplesheet$gamete_intrafallopian_transfer,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                          baseMean log2FoldChange     lfcSE     stat
## chr17 30853101 30853200  13.22975      1.1183982 0.2491031 4.489700
## chr22 39513801 39513900  12.20109      1.1663198 0.2698930 4.321415
## chr16 81857701 81857800  10.88815      1.1653577 0.2730882 4.267330
## chr22 42088101 42088200  16.67260      0.9505196 0.2306662 4.120758
## chr2 87592701 87592800   14.67661      1.0874182 0.2702461 4.023807
## chr19 36146601 36146700  15.90479      0.9714860 0.2453981 3.958817
## chr15 59042801 59042900  12.53708      1.0339995 0.2614560 3.954775
## chr6 42166101 42166200   14.90020      1.0262018 0.2621842 3.914049
## chr13 39579601 39579700  11.24815      1.1615895 0.3035079 3.827214
## chr7 100350401 100350500 26.34002      0.9250375 0.2463861 3.754422
##                                pvalue       padj
## chr17 30853101 30853200  7.132360e-06 0.02377211
## chr22 39513801 39513900  1.550320e-05 0.02377211
## chr16 81857701 81857800  1.978262e-05 0.02377211
## chr22 42088101 42088200  3.776273e-05 0.03403366
## chr2 87592701 87592800   5.726486e-05 0.03945240
## chr19 36146601 36146700  7.532200e-05 0.03945240
## chr15 59042801 59042900  7.660661e-05 0.03945240
## chr6 42166101 42166200   9.076136e-05 0.04089934
## chr13 39579601 39579700  1.296019e-04 0.05191278
## chr7 100350401 100350500 1.737418e-04 0.06263391
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","nat",mygroups)
mygroups<-gsub("yes","gift",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw2",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dmw2",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000217, Un_gl000214, Un_gl000239, Un_gl000219, 1_gl000192_random, Un_gl000232, Un_gl000224
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 373 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       22904 |         0
##     [2]         2       35821 |      2271
##     [3]         3       21471 |         0
##     [4]         4       35902 |         0
##     [5]         5       30161 |      3555
##     ...       ...         ... .       ...
##   [369]       369        4384 |         0
##   [370]       370       41127 |         0
##   [371]       371       34826 |         0
##   [372]       372        8834 |      7031
##   [373]       373       32642 |     23994
##   -------
##   queryLength: 373 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 373 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]       17   30853101-30853200      * | RP11-466A19.3           0
##     [2]       22   39513801-39513900      * |          CBX7        2271
##     [3]       16   81857701-81857800      * |       LRG_376           0
##     [4]       22   42088101-42088200      * |      C22orf46           0
##     [5]        2   87592701-87592800      * |    AC068279.1        3555
##     ...      ...                 ...    ... .           ...         ...
##   [369]        1 201884301-201884400      * |         LMOD1           0
##   [370]        4 140079701-140079800      * |          ELF2           0
##   [371]       21   47268101-47268200      * |         PCBP3           0
##   [372]       11   50220201-50220300      * | RP11-347H15.2        7031
##   [373]        2 237687001-237687100      * |    AC011286.1       23994
##   -------
##   seqinfo: 30 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000222, Un_gl000225
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 214 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       44872 |         0
##     [2]         2        7562 |         0
##     [3]         3       10994 |         0
##     [4]         4       38610 |         0
##     [5]         5       52670 |     19140
##     ...       ...         ... .       ...
##   [210]       210       53230 |     83198
##   [211]       211       33887 |         0
##   [212]       212        7596 |     28265
##   [213]       213       46533 |        54
##   [214]       214       32759 |     17117
##   -------
##   queryLength: 214 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 214 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]        6     9706701-9706800      * |         OFCC1           0
##     [2]       10 126694101-126694200      * |         CTBP2           0
##     [3]       12     2516401-2516500      * |       LRG_334           0
##     [4]        3 160863401-160863500      * |          NMD3           0
##     [5]        8 134637601-134637700      * |       SNORA40       19140
##     ...      ...                 ...    ... .           ...         ...
##   [210]        9   22562901-22563000      * |  RP11-399D6.2       83198
##   [211]       20   54056101-54056200      * | RP5-1010E17.1           0
##   [212]       10 129379201-129379300      * |           NPS       28265
##   [213]        6   92526201-92526300      * |  RP11-40G16.1          54
##   [214]        2 242466601-242466700      * |       BOK-AS1       17117
##   -------
##   seqinfo: 25 sequences from an unspecified genome; no seqlengths

Natural Vs ICSI fresh

NAME = "WCB_natural_vs_intracytoplasmic_sperm_injection_fresh_embryo"
samplesheet<-subset(mdw, medical_help_to_conceive=="no" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="no"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr8 78869401 78869500    13.179944     -0.8471167 0.1696395 -4.993628
## chrX 135606501 135606600  12.885803     -0.7877829 0.1635452 -4.816911
## chr7 98335401 98335500    13.854443      0.5724927 0.1199350  4.773360
## chr2 10578701 10578800    10.494121      0.6409630 0.1349830  4.748472
## chr22 18235701 18235800   12.620603     -0.7487072 0.1599923 -4.679645
## chr8 78869301 78869400    10.249391     -0.8413987 0.1813358 -4.640003
## chr18 21749701 21749800   12.871286      0.6268482 0.1371546  4.570377
## chr13 114887201 114887300 10.906562     -0.8023554 0.1764635 -4.546862
## chr11 111054201 111054300  9.970166      0.6464340 0.1427947  4.527017
## chr3 195352701 195352800   9.590132      0.6430379 0.1436293  4.477066
##                                 pvalue        padj
## chr8 78869401 78869500    5.925533e-07 0.002259678
## chrX 135606501 135606600  1.457975e-06 0.002259678
## chr7 98335401 98335500    1.811775e-06 0.002259678
## chr2 10578701 10578800    2.049595e-06 0.002259678
## chr22 18235701 18235800   2.873726e-06 0.002534626
## chr8 78869301 78869400    3.484039e-06 0.002560769
## chr18 21749701 21749800   4.868479e-06 0.002931289
## chr13 114887201 114887300 5.445166e-06 0.002931289
## chr11 111054201 111054300 5.982223e-06 0.002931289
## chr3 195352701 195352800  7.567572e-06 0.003337299
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","nat",mygroups)
mygroups<-gsub("yes","icsi.fh",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw3",mx=y,groups=mygroups,n=15)

make_beeswarms_confects2(confects=confects,name="dmw3",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000220, Un_gl000217, Un_gl000219, Un_gl000234, Un_gl000224, Un_gl000221, Un_gl000239, 17_gl000205_random, Un_gl000214, 1_gl000192_random
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 882 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       49302 |     56840
##     [2]         2       28871 |      1293
##     [3]         3       25091 |         0
##     [4]         4       10367 |     72406
##     [5]         5       39149 |      5596
##     ...       ...         ... .       ...
##   [878]       878       26292 |      6668
##   [879]       879       50872 |         0
##   [880]       880       43527 |         0
##   [881]       881       54702 |      8791
##   [882]       882        4335 |         0
##   -------
##   queryLength: 882 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 882 ranges and 2 metadata columns:
##         seqnames              ranges strand |        gene    distance
##            <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##     [1]        7   98335401-98335500      * |   RNU6-393P       56840
##     [2]        2   10578701-10578800      * |        ODC1        1293
##     [3]       18   21749701-21749800      * |     OSBPL1A           0
##     [4]       11 111054201-111054300      * |    C11orf53       72406
##     [5]        3 195352701-195352800      * |  AC069213.1        5596
##     ...      ...                 ...    ... .         ...         ...
##   [878]       19     9855901-9856000      * |      ZNF846        6668
##   [879]        8   15427201-15427300      * |       TUSC3           0
##   [880]        5 122179701-122179800      * |       SNX24           0
##   [881]        9 124556601-124556700      * |      DAB2IP        8791
##   [882]        1 200029301-200029400      * |       NR5A2           0
##   -------
##   seqinfo: 33 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000237, Un_gl000225, 7_gl000195_random, Un_gl000222, Un_gl000224, 4_gl000194_random
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 1212 hits and 1 metadata column:
##          queryHits subjectHits |  distance
##          <integer>   <integer> | <integer>
##      [1]         1       51878 |    190835
##      [2]         2       63206 |      7710
##      [3]         3       34961 |         0
##      [4]         4       51878 |    190935
##      [5]         5       14995 |         0
##      ...       ...         ... .       ...
##   [1208]      1208       14371 |         0
##   [1209]      1209       54273 |         0
##   [1210]      1210       34686 |         0
##   [1211]      1211       20536 |      2798
##   [1212]      1212       34576 |         0
##   -------
##   queryLength: 1212 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 1212 ranges and 2 metadata columns:
##          seqnames              ranges strand |         gene    distance
##             <Rle>           <IRanges>  <Rle> |  <character> <DataFrame>
##      [1]        8   78869401-78869500      * | RP11-91P17.1      190835
##      [2]        X 135606501-135606600      * |        VGLL1        7710
##      [3]       22   18235701-18235800      * |          BID           0
##      [4]        8   78869301-78869400      * | RP11-91P17.1      190935
##      [5]       13 114887201-114887300      * |        RASA3           0
##      ...      ...                 ...    ... .          ...         ...
##   [1208]       13   50622101-50622200      * |        DLEU2           0
##   [1209]        9   97539601-97539700      * |       C9orf3           0
##   [1210]       21   43714901-43715000      * |        ABCG1           0
##   [1211]       16   33376201-33376300      * | RP11-23E10.4        2798
##   [1212]       21   38110501-38110600      * |         SIM2           0
##   -------
##   seqinfo: 29 sequences from an unspecified genome; no seqlengths

Natural Vs ICSI frozen

NAME = "WCB_natural_vs_intracytoplasmic_sperm_injection_frozen_embryo"
samplesheet<-subset(mdw, medical_help_to_conceive=="no" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="yes"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                           baseMean log2FoldChange     lfcSE      stat
## chr18 77380701 77380800  16.912354     -1.5075560 0.2767851 -5.446667
## chr18 77380801 77380900  20.698877     -1.0859208 0.2339465 -4.641748
## chr9 130628301 130628400 16.980399      0.6081491 0.1431800  4.247445
## chr1 32445901 32446000   11.079459      0.6990676 0.1661313  4.207922
## chr4 148771801 148771900 13.332171      0.6223662 0.1564437  3.978213
## chr15 96559701 96559800  12.501215     -0.9851054 0.2490842 -3.954909
## chr4 148771901 148772000  9.975663      0.7075575 0.1789136  3.954745
## chr5 1938501 1938600     13.686331     -0.8631993 0.2216432 -3.894544
## chr12 6041001 6041100    34.318833      0.8115905 0.2086039  3.890581
## chr12 6040901 6041000    32.872149      0.7488709 0.1925731  3.888762
##                                pvalue         padj
## chr18 77380701 77380800  5.132250e-08 0.0002268455
## chr18 77380801 77380900  3.454740e-06 0.0076349765
## chr9 130628301 130628400 2.162218e-05 0.0284791251
## chr1 32445901 32446000   2.577296e-05 0.0284791251
## chr4 148771801 148771900 6.943530e-05 0.0378028487
## chr15 96559701 96559800  7.656378e-05 0.0378028487
## chr4 148771901 148772000 7.661647e-05 0.0378028487
## chr5 1938501 1938600     9.838352e-05 0.0378028487
## chr12 6041001 6041100    1.000044e-04 0.0378028487
## chr12 6040901 6041000    1.007568e-04 0.0378028487
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","nat",mygroups)
mygroups<-gsub("yes","icsi.fz",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw4",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dmw4",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
ol_up
## Hits object with 525 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       54852 |       358
##     [2]         2        1049 |     33429
##     [3]         3       41259 |         0
##     [4]         4       41259 |         0
##     [5]         5       11072 |         0
##     ...       ...         ... .       ...
##   [521]       521       48171 |         0
##   [522]       522       32265 |         0
##   [523]       523       31854 |     43212
##   [524]       524       29059 |         0
##   [525]       525       13707 |     17114
##   -------
##   queryLength: 525 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 525 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]        9 130628301-130628400      * |           AK1         358
##     [2]        1   32445901-32446000      * |       KHDRBS1       33429
##     [3]        4 148771801-148771900      * |      ARHGAP10           0
##     [4]        4 148771901-148772000      * |      ARHGAP10           0
##     [5]       12     6041001-6041100      * |          ANO2           0
##     ...      ...                 ...    ... .           ...         ...
##   [521]        7   32183401-32183500      * |         PDE1C           0
##   [522]        2 218377601-218377700      * |         DIRC3           0
##   [523]        2 194936101-194936200      * |    AC068135.1       43212
##   [524]        2   24421101-24421200      * |       FAM228A           0
##   [525]       12 131814801-131814900      * | RP13-507P19.2       17114
##   -------
##   seqinfo: 24 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224, 7_gl000195_random, Un_gl000231, Un_gl000225, Un_gl000232, Un_gl000217, Un_gl000219
##   - in 'y': GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 462 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       25758 |      8397
##     [2]         2       25758 |      8497
##     [3]         3       19216 |     11517
##     [4]         4       41892 |         0
##     [5]         5       52941 |         0
##     ...       ...         ... .       ...
##   [458]       458       48780 |         0
##   [459]       459       32893 |     23928
##   [460]       460       33176 |      2994
##   [461]       461       23930 |         0
##   [462]       462       13310 |         0
##   -------
##   queryLength: 462 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 462 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]       18   77380701-77380800      * | RP11-567M16.2        8397
##     [2]       18   77380801-77380900      * | RP11-567M16.2        8497
##     [3]       15   96559701-96559800      * |    RP11-4G2.1       11517
##     [4]        5     1938501-1938600      * |  RP11-259O2.1           0
##     [5]        9     2129201-2129300      * |       SMARCA2           0
##     ...      ...                 ...    ... .           ...         ...
##   [458]        7   65492201-65492300      * | RP5-1132H15.3           0
##   [459]       20     3412201-3412300      * |     C20orf194       23928
##   [460]       20   21485601-21485700      * |       GSTM3P1        2994
##   [461]       17   58298201-58298300      * |         USP32           0
##   [462]       12 113336401-113336500      * |         RPH3A           0
##   -------
##   seqinfo: 31 sequences from an unspecified genome; no seqlengths

ICSI fresh Vs ICSI frozen

NAME = "WCB_intracytoplasmic_sperm_injection_fresh_embryo_vs_intracytoplasmic_sperm_injection_frozen_embryo"
samplesheet<-subset(mdw, intracytoplasmic_sperm_injection=="yes")
samplesheet$groups <- factor(samplesheet$frozen_embryo ,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                               baseMean log2FoldChange     lfcSE      stat
## chr1 17055401 17055500       55.494655      0.5291444 0.1439927  3.674799
## chrUn_gl000224 141501 141600 13.187467     -1.2710092 0.3627306 -3.504003
## chrUn_gl000224 136501 136600 11.591425     -1.2439351 0.3552201 -3.501872
## chr4 140079701 140079800     31.036529      0.6521172 0.1878481  3.471514
## chr4 162212101 162212200     12.649396     -1.3347748 0.3846272 -3.470308
## chr4 162212201 162212300     10.421576     -1.4468827 0.4249407 -3.404905
## chr5 53347501 53347600        9.728852     -1.0395377 0.3075009 -3.380601
## chr1 121434201 121434300     43.657565     -0.5850088 0.1749638 -3.343598
## chrUn_gl000217 86301 86400   13.445165     -0.9223583 0.2758936 -3.343166
## chrUn_gl000224 129701 129800 11.026800     -1.2770373 0.3834820 -3.330110
##                                    pvalue      padj
## chr1 17055401 17055500       0.0002380365 0.3826655
## chrUn_gl000224 141501 141600 0.0004583194 0.3826655
## chrUn_gl000224 136501 136600 0.0004620020 0.3826655
## chr4 140079701 140079800     0.0005175318 0.3826655
## chr4 162212101 162212200     0.0005198624 0.3826655
## chr4 162212201 162212300     0.0006618705 0.3826655
## chr5 53347501 53347600       0.0007232753 0.3826655
## chr1 121434201 121434300     0.0008269942 0.3826655
## chrUn_gl000217 86301 86400   0.0008282820 0.3826655
## chrUn_gl000224 129701 129800 0.0008681160 0.3826655
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","icsi.fh",mygroups)
mygroups<-gsub("yes","icsi.fz",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw5",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dmw5",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
ol_up
## Hits object with 133 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1         567 |      4946
##     [2]         2       41127 |         0
##     [3]         3       33781 |      8096
##     [4]         4        5721 |     22829
##     [5]         5        1049 |     33429
##     ...       ...         ... .       ...
##   [129]       129       28852 |         0
##   [130]       130       37705 |     15124
##   [131]       131       32637 |         0
##   [132]       132       63608 |    730089
##   [133]       133       27856 |         0
##   -------
##   queryLength: 133 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 133 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]        1   17055401-17055500      * |       FAM231C        4946
##     [2]        4 140079701-140079800      * |          ELF2           0
##     [3]       20   47813001-47813100      * |         STAU1        8096
##     [4]       10   19206501-19206600      * | RP11-288D15.1       22829
##     [5]        1   32445901-32446000      * |       KHDRBS1       33429
##     ...      ...                 ...    ... .           ...         ...
##   [129]        2     9997801-9997900      * |         TAF1B           0
##   [130]        3 109231701-109231800      * |    AC092905.1       15124
##   [131]        2 237415101-237415200      * |         IQCA1           0
##   [132]        Y     1922601-1922700      * |    RNU6-1334P      730089
##   [133]       19   47273401-47273500      * |          FKRP           0
##   -------
##   seqinfo: 24 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224, Un_gl000217, Un_gl000219, Un_gl000220
##   - in 'y': 15, 21, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 137 hits and 1 metadata column:
##         queryHits subjectHits |  distance
##         <integer>   <integer> | <integer>
##     [1]         1       41450 |     87904
##     [2]         2       41450 |     87804
##     [3]         3       42518 |         0
##     [4]         4        2811 |    111878
##     [5]         5       42382 |         0
##     ...       ...         ... .       ...
##   [133]       133       50537 |         0
##   [134]       134       39153 |         0
##   [135]       135       33121 |      1813
##   [136]       136       44587 |         0
##   [137]       137       54120 |     35242
##   -------
##   queryLength: 137 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 137 ranges and 2 metadata columns:
##         seqnames              ranges strand |          gene    distance
##            <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##     [1]        4 162212101-162212200      * |  RP11-234O6.2       87904
##     [2]        4 162212201-162212300      * |  RP11-234O6.2       87804
##     [3]        5   53347501-53347600      * |         ARL15           0
##     [4]        1 121434201-121434300      * | RP11-344P13.1      111878
##     [5]        5   39392601-39392700      * |          DAB2           0
##     ...      ...                 ...    ... .           ...         ...
##   [133]        8     1085901-1086000      * | CTD-2281E23.2           0
##   [134]        3 195425401-195425500      * |     LINC00969           0
##   [135]       20   18479701-18479800      * |         RBBP9        1813
##   [136]        5 177996301-177996400      * |       COL23A1           0
##   [137]        9   90076801-90076900      * |         DAPK1       35242
##   -------
##   seqinfo: 24 sequences from an unspecified genome; no seqlengths

ovarian stimulation vs GIFT

NAME = "WCB_ovarian_stimulation_vs_gamete_intrafallopian_transfer"
samplesheet<-subset(mdw, ovarian_stimulation=="yes" | gamete_intrafallopian_transfer=="yes")
samplesheet$groups <- factor(samplesheet$gamete_intrafallopian_transfer,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr22 16693101 16693200    9.980976      1.1339874 0.2952526  3.840736
## chr17 30853101 30853200   14.995698      0.9878678 0.2604167  3.793412
## chr12 117110101 117110200  9.921964      1.1579951 0.3411362  3.394525
## chr7 16438401 16438500    11.458974     -2.3288024 0.6931108 -3.359928
## chr16 83819801 83819900   16.553392      0.8825193 0.2795569  3.156850
## chr17 76414601 76414700   13.378902     -1.5315581 0.4904426 -3.122808
## chr19 23506701 23506800   15.842338      0.8930220 0.2866894  3.114946
## chr9 15085901 15086000    12.815366      0.8945127 0.2895910  3.088882
## chr5 180085401 180085500  17.119114      1.7378892 0.5747113  3.023934
## chr5 180085501 180085600  15.107439      1.9483332 0.6498883  2.997951
##                                 pvalue      padj
## chr22 16693101 16693200   0.0001226660 0.2920558
## chr17 30853101 30853200   0.0001485911 0.2920558
## chr12 117110101 117110200 0.0006874785 0.7661794
## chr7 16438401 16438500    0.0007796280 0.7661794
## chr16 83819801 83819900   0.0015948332 0.8479443
## chr17 76414601 76414700   0.0017913442 0.8479443
## chr19 23506701 23506800   0.0018397862 0.8479443
## chr9 15085901 15086000    0.0020091088 0.8479443
## chr5 180085401 180085500  0.0024951078 0.8479443
## chr5 180085501 180085600  0.0027180141 0.8479443
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","os",mygroups)
mygroups<-gsub("yes","gift",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw6",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dmw6",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000214
##   - in 'y': 3, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 84 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       34896 |      5855
##    [2]         2       22904 |         0
##    [3]         3       13383 |         0
##    [4]         4       21486 |         0
##    [5]         5       27026 |         0
##    ...       ...         ... .       ...
##   [80]        80       33717 |       186
##   [81]        81       34826 |         0
##   [82]        82       42796 |         0
##   [83]        83       41747 |         0
##   [84]        84       35340 |         0
##   -------
##   queryLength: 84 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 84 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]       22   16693101-16693200      * |  LA16c-13E4.3        5855
##    [2]       17   30853101-30853200      * | RP11-466A19.3           0
##    [3]       12 117110101-117110200      * | RP11-497G19.1           0
##    [4]       16   83819801-83819900      * |         CDH13           0
##    [5]       19   23506701-23506800      * |  CTB-176F20.3           0
##    ...      ...                 ...    ... .           ...         ...
##   [80]       20   44604901-44605000      * |         FTLP1         186
##   [81]       21   47268001-47268100      * |         PCBP3           0
##   [82]        5   70387601-70387700      * |  RP11-195E2.1           0
##   [83]        4 187036301-187036400      * |       FAM149A           0
##   [84]       22   24234701-24234800      * |   AP000350.10           0
##   -------
##   seqinfo: 24 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
ol_down
## Hits object with 60 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       47898 |         0
##    [2]         2       24444 |         0
##    [3]         3        5396 |       721
##    [4]         4       44587 |         0
##    [5]         5       24800 |         0
##    ...       ...         ... .       ...
##   [56]        56       30730 |         0
##   [57]        57       37635 |         0
##   [58]        58       47870 |      5090
##   [59]        59       45036 |     41241
##   [60]        60         568 |         0
##   -------
##   queryLength: 60 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 60 ranges and 2 metadata columns:
##        seqnames              ranges strand |         gene    distance
##           <Rle>           <IRanges>  <Rle> |  <character> <DataFrame>
##    [1]        7   16438401-16438500      * |         ISPD           0
##    [2]       17   76414601-76414700      * |         PGS1           0
##    [3]        1 248736201-248736300      * |       OR2T34         721
##    [4]        5 177996501-177996600      * |      COL23A1           0
##    [5]       18     7688101-7688200      * |        PTPRM           0
##    ...      ...                 ...    ... .          ...         ...
##   [56]        2 113356001-113356100      * |   AC012442.5           0
##   [57]        3 101913201-101913300      * |        ZPLD1           0
##   [58]        7   12699701-12699800      * |   AC011891.5        5090
##   [59]        6   21313301-21313400      * | RP1-135L22.1       41241
##   [60]        1   17123001-17123100      * |        CROCC           0
##   -------
##   seqinfo: 18 sequences from an unspecified genome; no seqlengths

ovarian stimulation vs ICSI fresh

NAME = "WCB_ovarian_stimulation_vs_intracytoplasmic_sperm_injection_fresh_embryo"
samplesheet<-subset(mdw, ovarian_stimulation=="yes" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="no"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr22 18137301 18137400    9.779628      0.7210510 0.1891315  3.812431
## chr20 46505901 46506000   12.066651     -0.5706467 0.1608996 -3.546600
## chr20 46506001 46506100   12.644342     -0.5613458 0.1588854 -3.533023
## chr6 5254301 5254400      16.185867     -0.5476619 0.1588228 -3.448257
## chr11 114719601 114719700 12.652462     -0.5282294 0.1603397 -3.294440
## chr1 568201 568300        54.388852     -0.3727779 0.1181326 -3.155588
## chr7 98335401 98335500    15.787574      0.4398273 0.1440408  3.053492
## chr10 53786501 53786600   10.832078      0.5772439 0.1911099  3.020482
## chr13 82443601 82443700   12.732790     -0.4574634 0.1574567 -2.905328
## chr9 4118801 4118900      12.005536      0.5094682 0.1766290  2.884398
##                                 pvalue      padj
## chr22 18137301 18137400   0.0001376064 0.5484674
## chr20 46505901 46506000   0.0003902360 0.5484674
## chr20 46506001 46506100   0.0004108370 0.5484674
## chr6 5254301 5254400      0.0005642173 0.5649225
## chr11 114719601 114719700 0.0009861790 0.7899294
## chr1 568201 568300        0.0016017491 0.9994297
## chr7 98335401 98335500    0.0022619494 0.9994297
## chr10 53786501 53786600   0.0025237290 0.9994297
## chr13 82443601 82443700   0.0036686829 0.9994297
## chr9 4118801 4118900      0.0039216271 0.9994297
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","os",mygroups)
mygroups<-gsub("yes","icsi.fh",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw7",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dmw7",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000225
##   - in 'y': 21, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 70 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       34959 |         0
##    [2]         2       49302 |     56840
##    [3]         3        6329 |         0
##    [4]         4       52962 |         0
##    [5]         5       22912 |         0
##    ...       ...         ... .       ...
##   [66]        66        6799 |         0
##   [67]        67       33419 |         0
##   [68]        68       32671 |      1130
##   [69]        69       10492 |    178091
##   [70]        70       48255 |         0
##   -------
##   queryLength: 70 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 70 ranges and 2 metadata columns:
##        seqnames              ranges strand |        gene    distance
##           <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##    [1]       22   18137301-18137400      * |     BCL2L13           0
##    [2]        7   98335401-98335500      * |   RNU6-393P       56840
##    [3]       10   53786501-53786600      * |       PRKG1           0
##    [4]        9     4118801-4118900      * |       GLIS3           0
##    [5]       17   32115801-32115900      * |       ASIC2           0
##    ...      ...                 ...    ... .         ...         ...
##   [66]       10   84641801-84641900      * |        NRG3           0
##   [67]       20   33182601-33182700      * |        PIGU           0
##   [68]        2 239113501-239113600      * |       ILKAP        1130
##   [69]       11 116000301-116000400      * |  AP000797.2      178091
##   [70]        7   37484301-37484400      * |       ELMO1           0
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000219, 7_gl000195_random, Un_gl000224
##   - in 'y': 12, 16, 21, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 64 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       33765 |      3883
##    [2]         2       33765 |      3983
##    [3]         3       44812 |         0
##    [4]         4       10475 |    140238
##    [5]         5          38 |         0
##    ...       ...         ... .       ...
##   [60]        60       45693 |         0
##   [61]        61       39662 |         0
##   [62]        62        1873 |         0
##   [63]        63       37718 |         0
##   [64]        64       26150 |         0
##   -------
##   queryLength: 64 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 64 ranges and 2 metadata columns:
##        seqnames              ranges strand |         gene    distance
##           <Rle>           <IRanges>  <Rle> |  <character> <DataFrame>
##    [1]       20   46505901-46506000      * |    RNA5SP486        3883
##    [2]       20   46506001-46506100      * |    RNA5SP486        3983
##    [3]        6     5254301-5254400      * |        LYRM4           0
##    [4]       11 114719601-114719700      * |        NXPE2      140238
##    [5]        1       568201-568300      * | RP5-857K21.7           0
##    ...      ...                 ...    ... .          ...         ...
##   [60]        6   34729201-34729300      * |        SNRPC           0
##   [61]        4   22505901-22506000      * |       GPR125           0
##   [62]        1   65688901-65689000      * |          AK4           0
##   [63]        3 111279801-111279900      * |         CD96           0
##   [64]       19     6919601-6919700      * |         EMR1           0
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths

ovarian stimulation vs ICSI frozen

NAME = "WCB_ovarian_stimulation_vs_gamete_intrafallopian_transfer"
samplesheet<-subset(mdw, ovarian_stimulation=="yes" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="yes"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr22 18137301 18137400    9.672049      0.7234614 0.1894935  3.817869
## chr20 46506001 46506100   12.737820     -0.5606210 0.1617836 -3.465253
## chr10 53786501 53786600   10.950650      0.6281321 0.1829794  3.432803
## chr20 46505901 46506000   12.181244     -0.5624272 0.1639817 -3.429817
## chr1 568201 568300        54.385106     -0.3970362 0.1209857 -3.281678
## chr6 5254301 5254400      16.457519     -0.5198147 0.1612261 -3.224135
## chr11 114719601 114719700 12.748471     -0.5275933 0.1648446 -3.200550
## chr21 41496201 41496300    9.969344      0.6933557 0.2189795  3.166304
## chr2 203793501 203793600  11.672169      0.5324327 0.1709478  3.114592
## chr12 33515801 33515900   10.072216      0.5100111 0.1759576  2.898488
##                                 pvalue      padj
## chr22 18137301 18137400   0.0001346092 0.5370906
## chr20 46506001 46506100   0.0005297330 0.6024781
## chr10 53786501 53786600   0.0005973764 0.6024781
## chr20 46505901 46506000   0.0006039881 0.6024781
## chr1 568201 568300        0.0010319121 0.7700175
## chr6 5254301 5254400      0.0012635393 0.7700175
## chr11 114719601 114719700 0.0013716580 0.7700175
## chr21 41496201 41496300   0.0015438947 0.7700175
## chr2 203793501 203793600  0.0018419929 0.8166169
## chr12 33515801 33515900   0.0037496652 0.9942509
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","os",mygroups)
mygroups<-gsub("yes","icsi.fz",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw8",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dmw8",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000225
##   - in 'y': 15, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 76 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       34959 |         0
##    [2]         2        6329 |         0
##    [3]         3       34651 |         0
##    [4]         4       32035 |         0
##    [5]         5       11716 |       210
##    ...       ...         ... .       ...
##   [72]        72       48741 |         0
##   [73]        73       39565 |         0
##   [74]        74       25091 |         0
##   [75]        75       50846 |     15235
##   [76]        76       34696 |         0
##   -------
##   queryLength: 76 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 76 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]       22   18137301-18137400      * |       BCL2L13           0
##    [2]       10   53786501-53786600      * |         PRKG1           0
##    [3]       21   41496201-41496300      * |         DSCAM           0
##    [4]        2 203793501-203793600      * |          CARF           0
##    [5]       12   33515801-33515900      * |      SNORD112         210
##    ...      ...                 ...    ... .           ...         ...
##   [72]        7   64362201-64362300      * |        ZNF273           0
##   [73]        4   11781301-11781400      * | RP11-281P23.2           0
##   [74]       18   21749701-21749800      * |       OSBPL1A           0
##   [75]        8   12917201-12917300      * |     RNU6-842P       15235
##   [76]       21   43953501-43953600      * |       SLC37A1           0
##   -------
##   seqinfo: 22 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000219, 7_gl000195_random, Un_gl000224, Un_gl000232
##   - in 'y': 12, 16, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 63 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       33765 |      3983
##    [2]         2       33765 |      3883
##    [3]         3          38 |         0
##    [4]         4       44812 |         0
##    [5]         5       10475 |    140238
##    ...       ...         ... .       ...
##   [59]        59       32311 |         0
##   [60]        60        5508 |     21253
##   [61]        61       15894 |         0
##   [62]        62       29731 |         0
##   [63]        63       19322 |         0
##   -------
##   queryLength: 63 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 63 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]       20   46506001-46506100      * |     RNA5SP486        3983
##    [2]       20   46505901-46506000      * |     RNA5SP486        3883
##    [3]        1       568201-568300      * |  RP5-857K21.7           0
##    [4]        6     5254301-5254400      * |         LYRM4           0
##    [5]       11 114719601-114719700      * |         NXPE2      140238
##    ...      ...                 ...    ... .           ...         ...
##   [59]        2 219783901-219784000      * |   AC073128.10           0
##   [60]       10     5615601-5615700      * | RP13-463N16.6       21253
##   [61]       14   53764001-53764100      * |    AL163953.3           0
##   [62]        2   64944101-64944200      * |       SERTAD2           0
##   [63]       15 101771701-101771800      * |         CHSY1           0
##   -------
##   seqinfo: 24 sequences from an unspecified genome; no seqlengths

GIFT vs ICSI fresh

NAME = "WCB_gamete_intrafallopian_transfer_vs_intracytoplasmic_sperm_injection_fresh_embryo"
samplesheet<-subset(mdw, gamete_intrafallopian_transfer=="yes" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="no"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chr5 180085501 180085600  16.470312      -2.399379 0.5171319 -4.639781
## chr5 180085401 180085500  18.252841      -2.167830 0.4786014 -4.529512
## chr16 83819801 83819900   14.489352      -1.311364 0.3730819 -3.514950
## chr11 131925601 131925700 13.758961      -1.361866 0.3933131 -3.462549
## chr5 54244201 54244300     9.764983       2.871821 0.9020866  3.183531
## chr5 177996501 177996600  14.169417       1.715696 0.5609513  3.058547
## chr5 80451501 80451600     9.780591       4.447250 1.4588228  3.048519
## chr5 69301501 69301600    11.711080      -1.235108 0.4075678 -3.030436
## chr22 16693101 16693200   10.871257      -1.196010 0.3953796 -3.024967
## chr7 16438401 16438500    11.850219       2.256864 0.7481360  3.016649
##                                 pvalue       padj
## chr5 180085501 180085600  3.487788e-06 0.01235021
## chr5 180085401 180085500  5.912022e-06 0.01235021
## chr16 83819801 83819900   4.398368e-04 0.55889594
## chr11 131925601 131925700 5.350847e-04 0.55889594
## chr5 54244201 54244300    1.454904e-03 0.99972323
## chr5 177996501 177996600  2.224133e-03 0.99972323
## chr5 80451501 80451600    2.299721e-03 0.99972323
## chr5 69301501 69301600    2.442008e-03 0.99972323
## chr22 16693101 16693200   2.486600e-03 0.99972323
## chr7 16438401 16438500    2.555859e-03 0.99972323
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","gift",mygroups)
mygroups<-gsub("yes","icsi.fh",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw9",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dmw9",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000239
##   - in 'y': 10, 13, 21, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X, Y
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_up
## Hits object with 62 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       42531 |      7762
##    [2]         2       44587 |         0
##    [3]         3       43016 |         0
##    [4]         4       47898 |         0
##    [5]         5       24444 |         0
##    ...       ...         ... .       ...
##   [58]        58       39150 |       260
##   [59]        59       47518 |         0
##   [60]        60       48859 |         0
##   [61]        61       49699 |     33787
##   [62]        62       28771 |         0
##   -------
##   queryLength: 62 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 62 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]        5   54244201-54244300      * |  RP11-45H22.3        7762
##    [2]        5 177996501-177996600      * |       COL23A1           0
##    [3]        5   80451501-80451600      * |       RASGRF2           0
##    [4]        7   16438401-16438500      * |          ISPD           0
##    [5]       17   76414601-76414700      * |          PGS1           0
##    ...      ...                 ...    ... .           ...         ...
##   [58]        3 195366501-195366600      * |  RP11-141C7.4         260
##   [59]        6 165877101-165877200      * |        PDE10A           0
##   [60]        7   70182301-70182400      * |         AUTS2           0
##   [61]        7 118625101-118625200      * | RP11-533K11.1       33787
##   [62]        2     3749001-3749100      * |          ALLC           0
##   -------
##   seqinfo: 20 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000224, Un_gl000214, Un_gl000219
##   - in 'y': 20, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 74 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       44657 |      8876
##    [2]         2       44657 |      8776
##    [3]         3       21486 |         0
##    [4]         4       10899 |         0
##    [5]         5       42771 |     19473
##    ...       ...         ... .       ...
##   [70]        70       35902 |         0
##   [71]        71        7019 |      8260
##   [72]        72       50510 |         0
##   [73]        73       21438 |         0
##   [74]        74        9617 |     25208
##   -------
##   queryLength: 74 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 74 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]        5 180085501-180085600      * |          FLT4        8876
##    [2]        5 180085401-180085500      * |          FLT4        8776
##    [3]       16   83819801-83819900      * |         CDH13           0
##    [4]       11 131925601-131925700      * |           NTM           0
##    [5]        5   69301501-69301600      * |        SERF1B       19473
##    ...      ...                 ...    ... .           ...         ...
##   [70]       22   42088101-42088200      * |      C22orf46           0
##   [71]       10   96153901-96154000      * |       TBC1D12        8260
##   [72]        7 158743201-158743300      * |         WDR60           0
##   [73]       16   80473501-80473600      * | RP11-525K10.3           0
##   [74]       11   69659001-69659100      * |          FGF3       25208
##   -------
##   seqinfo: 25 sequences from an unspecified genome; no seqlengths

GIFT vs ICSI frozen

NAME = "WCB_gamete_intrafallopian_transfer_vs_intracytoplasmic_sperm_injection_frozen_embryo"
samplesheet<-subset(mdw, gamete_intrafallopian_transfer=="yes" | (intracytoplasmic_sperm_injection=="yes"&frozen_embryo=="yes"))
samplesheet$groups <- factor(samplesheet$intracytoplasmic_sperm_injection,levels=c("no","yes"))
samplesheet$sex <- factor(samplesheet$gender,levels=c("male","female"))
counts_f<-counts[,colnames(counts) %in% rownames(samplesheet)]

# filter genes with fewer than 10 reads per sample
y <- counts_f[which(rowSums(counts_f)/ncol(counts_f)>=(10)),]
y <- y[,order(colnames(y))]
samplesheet <- samplesheet[order(rownames(samplesheet)),]

dds <- DESeqDataSetFromMatrix(countData = y , colData = samplesheet, design = ~ sex + groups )
res <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
z <- results(res)
vsd <- vst(dds, blind=FALSE)
zz <-cbind(as.data.frame(z),assay(vsd))
dge <- as.data.frame(zz[order(zz$pvalue),])
dge[1:10,1:6]
##                            baseMean log2FoldChange     lfcSE      stat
## chrY 2144901 2145000      10.676552     -2.0128241 0.6172267 -3.261078
## chr2 73946601 73946700    10.001901      2.1337142 0.7309475  2.919107
## chr22 16693101 16693200   12.265737     -1.2467815 0.4880763 -2.554481
## chr17 1062401 1062500     30.321491     -0.9421519 0.3764385 -2.502805
## chr13 110221501 110221600  9.911565      1.6834761 0.6743905  2.496293
## chr2 73946701 73946800    10.396826      1.5514510 0.6450579  2.405134
## chr2 73946801 73946900     9.557529      1.6012139 0.6706665  2.387496
## chr2 3749101 3749200      11.951058      1.4677959 0.6299287  2.330099
## chr6 24538901 24539000    10.081781      1.4884713 0.6444221  2.309777
## chr18 77380801 77380900   14.570227     -1.1827601 0.5183389 -2.281828
##                                pvalue      padj
## chrY 2144901 2145000      0.001109896 0.9997649
## chr2 73946601 73946700    0.003510355 0.9997649
## chr22 16693101 16693200   0.010634634 0.9997649
## chr17 1062401 1062500     0.012321354 0.9997649
## chr13 110221501 110221600 0.012549896 0.9997649
## chr2 73946701 73946800    0.016166521 0.9997649
## chr2 73946801 73946900    0.016963575 0.9997649
## chr2 3749101 3749200      0.019800937 0.9997649
## chr6 24538901 24539000    0.020900504 0.9997649
## chr18 77380801 77380900   0.022499500 0.9997649
sig <- subset(dge,padj<0.05)
SIG = nrow(sig)
DN = nrow(subset(sig,log2FoldChange<0))
UP = nrow(subset(sig,log2FoldChange>0))
HEADER = paste(NAME, SIG , "DMRs,", UP ,"hypermethylated,", DN, "hypomethylated")


#volcano plot
plot(dge$log2FoldChange,-log10(dge$pvalue),cex=0.6,cex.axis=1.2,cex.lab=1.3, 
xlab="log2 fold change",
ylab="log10 pvalue"
,pch=19,col="#838383")
points(sig$log2FoldChange,-log10(sig$pvalue),cex=0.6,pch=19,col="red")
mtext((HEADER),cex=1.0)

#heatmap chart
mygroups<-as.numeric(samplesheet$groups)
colCols<-gsub("1","yellow",mygroups)
colCols<-gsub("2","orange",colCols)
my_palette <- colorRampPalette(c("blue", "white", "red"))(n = 25)
dgem<-as.matrix(dge[1:50,7:ncol(dge)])
heatmap.2(dgem,trace="none",scale="row",margin=c(5,5),col=my_palette,cexRow = 0.5,cexCol = 0.4,ColSideColors =colCols)

#beeswarm charts
confects <- deseq2_confects(res)
mygroups<-samplesheet$groups
mygroups<-gsub("no","gift",mygroups)
mygroups<-gsub("yes","icsi.fz",mygroups)
mygroups<-factor(mygroups)
make_beeswarm2(dm=dge,name="dmw10",mx=y,groups=mygroups,n=15)
make_beeswarms_confects2(confects=confects,name="dmw10",mx=y,groups=mygroups,n=15)

## Annotation
dge$chr<-sapply(strsplit(rownames(dge)," "),"[[",1)
dge$chr<-gsub("chr","",dge$chr)
dge$start<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",2))
dge$end<-as.integer(sapply(strsplit(rownames(dge)," "),"[[",3))

sig<-subset(dge,pvalue<.05)
sig_down<-subset(sig,log2FoldChange<0)
sig_up<-subset(sig,log2FoldChange>0)

gr_up<-GRanges(seqnames=sig_up$chr,
ranges=IRanges(start = sig_up$start , end = sig_up$end)) 
gr_up <- gr_up[which(seqnames(gr_up) %in% seqnames(ensgenes)),]
ol_up<-distanceToNearest(gr_up, ensgenes)
ol_up
## Hits object with 19 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       29920 |      9530
##    [2]         2       14912 |    159028
##    [3]         3       29920 |      9430
##    [4]         4       29920 |      9330
##    [5]         5       28771 |         0
##    ...       ...         ... .       ...
##   [15]        15       14275 |      1365
##   [16]        16       24444 |         0
##   [17]        17       45715 |         0
##   [18]        18        5135 |         0
##   [19]        19        5674 |       747
##   -------
##   queryLength: 19 / subjectLength: 64102
gr_up$gene <- ensgenes[subjectHits(ol_up),"symbol"]$symbol
gr_up$distance <- elementMetadata(ol_up)
gr_up
## GRanges object with 19 ranges and 2 metadata columns:
##        seqnames              ranges strand |        gene    distance
##           <Rle>           <IRanges>  <Rle> | <character> <DataFrame>
##    [1]        2   73946601-73946700      * |       TPRKB        9530
##    [2]       13 110221501-110221600      * |   LINC00676      159028
##    [3]        2   73946701-73946800      * |       TPRKB        9430
##    [4]        2   73946801-73946900      * |       TPRKB        9330
##    [5]        2     3749101-3749200      * |        ALLC           0
##    ...      ...                 ...    ... .         ...         ...
##   [15]       13   45875601-45875700      * |  AL138963.1        1365
##   [16]       17   76414601-76414700      * |        PGS1           0
##   [17]        6   35546901-35547000      * |       FKBP5           0
##   [18]        1 236330101-236330200      * |     GPR137B           0
##   [19]       10   15555101-15555200      * |       ITGA8         747
##   -------
##   seqinfo: 10 sequences from an unspecified genome; no seqlengths
gr_down<-GRanges(seqnames=sig_down$chr,
ranges=IRanges(start = sig_down$start , end = sig_down$end))
gr_down <- gr_down[which(seqnames(gr_down) %in% seqnames(ensgenes)),]
ol_down<-distanceToNearest(gr_down, ensgenes)
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
##   - in 'x': Un_gl000217, Un_gl000214, Un_gl000219, Un_gl000224
##   - in 'y': 10, 11, 12, 13, 14, 15, 19, 4, 6, 7, 8, GL000191.1, GL000192.1, GL000193.1, GL000194.1, GL000195.1, GL000196.1, GL000199.1, GL000201.1, GL000204.1, GL000205.1, GL000209.1, GL000211.1, GL000212.1, GL000213.1, GL000215.1, GL000216.1, GL000218.1, GL000219.1, GL000220.1, GL000221.1, GL000222.1, GL000223.1, GL000224.1, GL000225.1, GL000228.1, GL000229.1, GL000230.1, GL000231.1, GL000233.1, GL000236.1, GL000237.1, GL000240.1, GL000241.1, GL000242.1, GL000243.1, GL000247.1, HG1007_PATCH, HG1032_PATCH, HG104_HG975_PATCH, HG1063_PATCH, HG1074_PATCH, HG1079_PATCH, HG1082_HG167_PATCH, HG1091_PATCH, HG1133_PATCH, HG1146_PATCH, HG115_PATCH, HG1208_PATCH, HG1211_PATCH, HG122_PATCH, HG1257_PATCH, HG1287_PATCH, HG1292_PATCH, HG1293_PATCH, HG1304_PATCH, HG1308_PATCH, HG1322_PATCH, HG1350_HG959_PATCH, HG1423_PATCH, HG1424_PATCH, HG1425_PATCH, HG1426_PATCH, HG142_HG150_NOVEL_TEST, HG1433_PATCH, HG1434_PATCH, HG1435_PATCH, HG1436_HG1432_PATCH, HG1437_PATCH, HG1438_PATCH, HG1439_PATCH, HG1440_PATCH, HG1441_PATCH, HG1442_PATCH, HG1443_HG1444_PATCH, HG144_PATCH, HG1453_PATCH, HG1458_PATCH, HG1459_PATCH, HG1462_PATCH, HG1463_PATCH, HG1472_PATCH, HG1479_PATCH, HG1486_PATCH, HG1487_PATCH, HG1488_PATCH, HG1490_PATCH, HG1497_PATCH, HG14_PATCH, HG1500_PATCH, HG1501_PATCH, HG1502_PATCH, HG151_NOVEL_TEST, HG1591_PATCH, HG1592_PATCH, HG1595_PATCH, HG1699_PATCH, HG174_HG254_PATCH, HG183_PATCH, HG185_PATCH, HG186_PATCH, HG193_PATCH, HG19_PATCH, HG237_PATCH, HG243_PATCH, HG256_PATCH, HG271_PATCH, HG27_PATCH, HG280_PATCH, HG281_PATCH, HG299_PATCH, HG29_PATCH, HG305_PATCH, HG306_PATCH, HG311_PATCH, HG325_PATCH, HG329_PATCH, HG339_PATCH, HG344_PATCH, HG348_PATCH, HG357_PATCH, HG375_PATCH, HG385_PATCH, HG388_HG400_PATCH, HG414_PATCH, HG417_PATCH, HG418_PATCH, HG444_PATCH, HG480_HG481_PATCH, HG497_PATCH, HG506_HG507_HG1000_PATCH, HG50_PATCH, HG531_PATCH, HG536_PATCH, HG544_PATCH, HG686_PATCH, HG706_PATCH, HG729_PATCH, HG730_PATCH, HG736_PATCH, HG745_PATCH, HG747_PATCH, HG748_PATCH, HG75_PATCH, HG79_PATCH, HG7_PATCH, HG858_PATCH, HG865_PATCH, HG871_PATCH, HG873_PATCH, HG883_PATCH, HG905_PATCH, HG944_PATCH, HG946_PATCH, HG953_PATCH, HG957_PATCH, HG962_PATCH, HG971_PATCH, HG979_PATCH, HG987_PATCH, HG989_PATCH, HG990_PATCH, HG991_PATCH, HG996_PATCH, HG998_1_PATCH, HG998_2_PATCH, HG999_1_PATCH, HG999_2_PATCH, HSCHR10_1_CTG2, HSCHR10_1_CTG5, HSCHR12_1_CTG1, HSCHR12_1_CTG2_1, HSCHR12_1_CTG5, HSCHR12_2_CTG2, HSCHR12_2_CTG2_1, HSCHR12_3_CTG2_1, HSCHR15_1_CTG4, HSCHR15_1_CTG8, HSCHR16_1_CTG3_1, HSCHR16_2_CTG3_1, HSCHR17_1, HSCHR17_1_CTG1, HSCHR17_1_CTG4, HSCHR17_2_CTG4, HSCHR17_3_CTG4, HSCHR17_4_CTG4, HSCHR17_5_CTG4, HSCHR17_6_CTG4, HSCHR18_1_CTG1_1, HSCHR18_1_CTG2_1, HSCHR18_2_CTG2, HSCHR18_2_CTG2_1, HSCHR19LRC_COX1_CTG1, HSCHR19LRC_COX2_CTG1, HSCHR19LRC_LRC_I_CTG1, HSCHR19LRC_LRC_J_CTG1, HSCHR19LRC_LRC_S_CTG1, HSCHR19LRC_LRC_T_CTG1, HSCHR19LRC_PGF1_CTG1, HSCHR19LRC_PGF2_CTG1, HSCHR19_1_CTG3, HSCHR19_1_CTG3_1, HSCHR19_2_CTG3, HSCHR19_3_CTG3, HSCHR1_1_CTG31, HSCHR1_2_CTG31, HSCHR1_3_CTG31, HSCHR20_1_CTG1, HSCHR21_2_CTG1_1, HSCHR21_3_CTG1_1, HSCHR21_4_CTG1_1, HSCHR22_1_CTG1, HSCHR22_1_CTG2, HSCHR22_2_CTG1, HSCHR2_1_CTG1, HSCHR2_1_CTG12, HSCHR2_2_CTG12, HSCHR3_1_CTG1, HSCHR3_1_CTG2_1, HSCHR4_1, HSCHR4_1_CTG12, HSCHR4_1_CTG6, HSCHR4_2_CTG9, HSCHR5_1_CTG1, HSCHR5_1_CTG2, HSCHR5_1_CTG5, HSCHR5_2_CTG1, HSCHR5_3_CTG1, HSCHR6_1_CTG5, HSCHR6_MHC_APD, HSCHR6_MHC_COX, HSCHR6_MHC_DBB, HSCHR6_MHC_MANN, HSCHR6_MHC_MCF, HSCHR6_MHC_QBL, HSCHR6_MHC_SSTO, HSCHR7_1_CTG6, HSCHR9_1_CTG1, HSCHR9_1_CTG35, HSCHR9_2_CTG35, HSCHR9_3_CTG35, LRG_123, LRG_162, LRG_183, LRG_187, LRG_239, LRG_311, LRG_415, LRG_93, MT, X
##   Make sure to always combine/compare objects based on the same reference
##   genome (use suppressWarnings() to suppress this warning).
ol_down
## Hits object with 15 hits and 1 metadata column:
##        queryHits subjectHits |  distance
##        <integer>   <integer> | <integer>
##    [1]         1       63608 |    507789
##    [2]         2       34896 |      5855
##    [3]         3       21747 |         0
##    [4]         4       25758 |      8497
##    [5]         5       53674 |       719
##    ...       ...         ... .       ...
##   [11]        11       33717 |       186
##   [12]        12       34826 |         0
##   [13]        13       21486 |         0
##   [14]        14       43325 |         0
##   [15]        15       21747 |         0
##   -------
##   queryLength: 15 / subjectLength: 64102
gr_down$gene <- ensgenes[subjectHits(ol_down),"symbol"]$symbol
gr_down$distance <- elementMetadata(ol_down)
gr_down
## GRanges object with 15 ranges and 2 metadata columns:
##        seqnames              ranges strand |          gene    distance
##           <Rle>           <IRanges>  <Rle> |   <character> <DataFrame>
##    [1]        Y     2144901-2145000      * |    RNU6-1334P      507789
##    [2]       22   16693101-16693200      * |  LA16c-13E4.3        5855
##    [3]       17     1062401-1062500      * |           ABR           0
##    [4]       18   77380801-77380900      * | RP11-567M16.2        8497
##    [5]        9   44070601-44070700      * |    CR848007.2         719
##    ...      ...                 ...    ... .           ...         ...
##   [11]       20   44604901-44605000      * |         FTLP1         186
##   [12]       21   47268001-47268100      * |         PCBP3           0
##   [13]       16   83819801-83819900      * |         CDH13           0
##   [14]        5 107687601-107687700      * |        FBXL17           0
##   [15]       17     1062301-1062400      * |           ABR           0
##   -------
##   seqinfo: 16 sequences from an unspecified genome; no seqlengths

summary of DMRs in CBMCs

summary of DMRs in WBMCs